version 2013-02-08 Rl
- fixes/optimization in multistrip encoding and codebook size choice,
quality/bitrate is now better than that of the binary proprietary encoder
version 2013-02-12 Rl
- separated codebook training sets, avoided the transfer of wasted bytes,
which yields both better quality and smaller files
- now using the correct colorspace (TODO: move conversion to libswscale)
version 2013-02-14 Rl "Valentine's Day" version:
- made strip division more robust
- minimized bruteforcing the number of strips,
(costs some R/D but speeds up compession a lot), the heuristic
assumption is that score as a function of the number of strips has
one wide minimum which moves slowly, of course not fully true
- simplified codebook generation,
the old code was meant for other optimizations than we actually do
- optimized the codebook generation / error estimation for MODE_MC
version 2013-04-28 Rl
- bugfixed codebook optimization logic
version 2014-01-20 Rl
- made the encoder compatible with vintage decoders
and added some yet unused code for possible future
incremental codebook updates
- fixed a small memory leak
version 2014-01-21 Rl
- believe it or not, now we get even smaller files, with better quality
(which means I missed an optimization earlier :)
Signed-off-by: Diego Biurrun <diego@biurrun.de>
... | ... |
@@ -1,6 +1,9 @@ |
1 | 1 |
/* |
2 | 2 |
* Cinepak encoder (c) 2011 Tomas Härdin |
3 | 3 |
* http://titan.codemill.se/~tomhar/cinepakenc.patch |
4 |
+ * |
|
5 |
+ * Fixes and improvements, vintage decoders compatibility |
|
6 |
+ * (c) 2013, 2014 Rl, Aetey Global Technologies AB |
|
4 | 7 |
|
5 | 8 |
Permission is hereby granted, free of charge, to any person obtaining a |
6 | 9 |
copy of this software and associated documentation files (the "Software"), |
... | ... |
@@ -20,13 +23,27 @@ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | 20 |
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | 21 |
OTHER DEALINGS IN THE SOFTWARE. |
22 | 22 |
|
23 |
+ * TODO: |
|
24 |
+ * - optimize: color space conversion (move conversion to libswscale), ... |
|
25 |
+ * - implement options to set the min/max number of strips? |
|
26 |
+ * MAYBE: |
|
27 |
+ * - "optimally" split the frame into several non-regular areas |
|
28 |
+ * using a separate codebook pair for each area and approximating |
|
29 |
+ * the area by several rectangular strips (generally not full width ones) |
|
30 |
+ * (use quadtree splitting? a simple fixed-granularity grid?) |
|
23 | 31 |
*/ |
24 | 32 |
|
33 |
+#include <string.h> |
|
34 |
+ |
|
25 | 35 |
#include "libavutil/common.h" |
36 |
+#include "libavutil/internal.h" |
|
26 | 37 |
#include "libavutil/intreadwrite.h" |
27 | 38 |
#include "avcodec.h" |
28 | 39 |
#include "libavutil/lfg.h" |
29 | 40 |
#include "elbg.h" |
41 |
+#include "internal.h" |
|
42 |
+ |
|
43 |
+#include "libavutil/avassert.h" |
|
30 | 44 |
|
31 | 45 |
#define CVID_HEADER_SIZE 10 |
32 | 46 |
#define STRIP_HEADER_SIZE 12 |
... | ... |
@@ -36,11 +53,20 @@ OTHER DEALINGS IN THE SOFTWARE. |
36 | 36 |
#define MB_AREA (MB_SIZE*MB_SIZE) |
37 | 37 |
|
38 | 38 |
#define VECTOR_MAX 6 //six or four entries per vector depending on format |
39 |
-#define CODEBOOK_MAX 256 |
|
40 |
-#define CODEBOOK_NUM 5 //five potential codebooks (1, 4, 16, 64, 256) for V1 and V4 |
|
39 |
+#define CODEBOOK_MAX 256 //size of a codebook |
|
41 | 40 |
|
42 |
-#define MAX_STRIPS 1 //Note: having fewer choices regarding the number of strip speeds up encoding (obviously) |
|
41 |
+//#define MAX_STRIPS 32 //Note: having fewer choices regarding the number of strips speeds up encoding (obviously) |
|
42 |
+#define MAX_STRIPS 3 // This seems to be max for vintage players! -- rl |
|
43 |
+// TODO: we might want to have a "vintage compatibilty" switch |
|
43 | 44 |
#define MIN_STRIPS 1 //Note: having more strips speeds up encoding the frame (this is less obvious) |
45 |
+// MAX_STRIPS limits the maximum quality you can reach |
|
46 |
+// when you want high quality on high resolutions, |
|
47 |
+// MIN_STRIPS limits the minimum efficiently encodable bit rate |
|
48 |
+// on low resolutions |
|
49 |
+// the numbers are only used for brute force optimization for the first frame, |
|
50 |
+// for the following frames they are adaptively readjusted |
|
51 |
+// NOTE the decoder in ffmpeg has its own arbitrary limitation on the number |
|
52 |
+// of strips, currently 32 |
|
44 | 53 |
|
45 | 54 |
typedef enum { |
46 | 55 |
MODE_V1_ONLY = 0, |
... | ... |
@@ -53,42 +79,51 @@ typedef enum { |
53 | 53 |
typedef enum { |
54 | 54 |
ENC_V1, |
55 | 55 |
ENC_V4, |
56 |
- ENC_SKIP |
|
56 |
+ ENC_SKIP, |
|
57 |
+ |
|
58 |
+ ENC_UNCERTAIN |
|
57 | 59 |
} mb_encoding; |
58 | 60 |
|
59 | 61 |
typedef struct { |
60 | 62 |
int v1_vector; //index into v1 codebook |
61 | 63 |
int v1_error; //error when using V1 encoding |
62 |
- int v4_vector[CODEBOOK_NUM][4]; //indices into v4 codebooks |
|
63 |
- int v4_error[CODEBOOK_NUM]; //error when using V4 encodings |
|
64 |
+ int v4_vector[4]; //indices into v4 codebook |
|
65 |
+ int v4_error; //error when using V4 encoding |
|
64 | 66 |
int skip_error; //error when block is skipped (aka copied from last frame) |
65 | 67 |
mb_encoding best_encoding; //last result from calculate_mode_score() |
66 | 68 |
} mb_info; |
67 | 69 |
|
68 | 70 |
typedef struct { |
69 | 71 |
int v1_codebook[CODEBOOK_MAX*VECTOR_MAX]; |
70 |
- int *v4_codebook; |
|
72 |
+ int v4_codebook[CODEBOOK_MAX*VECTOR_MAX]; |
|
73 |
+ int v1_size; |
|
74 |
+ int v4_size; |
|
75 |
+ CinepakMode mode; |
|
71 | 76 |
} strip_info; |
72 | 77 |
|
73 | 78 |
typedef struct { |
74 | 79 |
AVCodecContext *avctx; |
75 |
- unsigned char *pict_bufs[3], *strip_buf, *frame_buf; |
|
76 |
- AVFrame last_frame; |
|
77 |
- AVFrame best_frame; |
|
78 |
- AVFrame scratch_frame; |
|
80 |
+ unsigned char *pict_bufs[4], *strip_buf, *frame_buf; |
|
81 |
+ AVFrame *last_frame; |
|
82 |
+ AVFrame *best_frame; |
|
83 |
+ AVFrame *scratch_frame; |
|
84 |
+ AVFrame *input_frame; |
|
79 | 85 |
enum AVPixelFormat pix_fmt; |
80 | 86 |
int w, h; |
87 |
+ int frame_buf_size; |
|
81 | 88 |
int curframe, keyint; |
82 | 89 |
AVLFG randctx; |
83 | 90 |
uint64_t lambda; |
84 | 91 |
int *codebook_input; |
85 | 92 |
int *codebook_closest; |
86 | 93 |
mb_info *mb; //MB RD state |
94 |
+ int min_strips; //the current limit |
|
95 |
+ int max_strips; //the current limit |
|
87 | 96 |
#ifdef CINEPAKENC_DEBUG |
88 | 97 |
mb_info *best_mb; //TODO: remove. only used for printing stats |
89 |
-#endif |
|
90 | 98 |
int num_v1_mode, num_v4_mode, num_mc_mode; |
91 | 99 |
int num_v1_encs, num_v4_encs, num_skips; |
100 |
+#endif |
|
92 | 101 |
} CinepakEncContext; |
93 | 102 |
|
94 | 103 |
static av_cold int cinepak_encode_init(AVCodecContext *avctx) |
... | ... |
@@ -102,20 +137,33 @@ static av_cold int cinepak_encode_init(AVCodecContext *avctx) |
102 | 102 |
return AVERROR(EINVAL); |
103 | 103 |
} |
104 | 104 |
|
105 |
- if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4) * (avctx->width * avctx->height) >> 2))) |
|
105 |
+ if (!(s->last_frame = av_frame_alloc())) |
|
106 | 106 |
return AVERROR(ENOMEM); |
107 |
+ if (!(s->best_frame = av_frame_alloc())) |
|
108 |
+ goto enomem; |
|
109 |
+ if (!(s->scratch_frame = av_frame_alloc())) |
|
110 |
+ goto enomem; |
|
111 |
+ if (avctx->pix_fmt == AV_PIX_FMT_RGB24) |
|
112 |
+ if (!(s->input_frame = av_frame_alloc())) |
|
113 |
+ goto enomem; |
|
114 |
+ |
|
115 |
+ if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2))) |
|
116 |
+ goto enomem; |
|
107 | 117 |
|
108 | 118 |
if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2))) |
109 | 119 |
goto enomem; |
110 | 120 |
|
111 |
- for(x = 0; x < 3; x++) |
|
112 |
- if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4) * (avctx->width * avctx->height) >> 2))) |
|
121 |
+ for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++) |
|
122 |
+ if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2))) |
|
113 | 123 |
goto enomem; |
114 | 124 |
|
115 | 125 |
mb_count = avctx->width * avctx->height / MB_AREA; |
116 | 126 |
|
117 |
- //the largest possible chunk is 0x31 with all MBs encoded in V4 mode, which is 34 bits per MB |
|
118 |
- strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16); |
|
127 |
+ //the largest possible chunk is 0x31 with all MBs encoded in V4 mode |
|
128 |
+ //and full codebooks being replaced in INTER mode, |
|
129 |
+ // which is 34 bits per MB |
|
130 |
+ //and 2*256 extra flag bits per strip |
|
131 |
+ strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8; |
|
119 | 132 |
|
120 | 133 |
frame_buf_size = CVID_HEADER_SIZE + MAX_STRIPS * strip_buf_size; |
121 | 134 |
|
... | ... |
@@ -137,113 +185,221 @@ static av_cold int cinepak_encode_init(AVCodecContext *avctx) |
137 | 137 |
s->avctx = avctx; |
138 | 138 |
s->w = avctx->width; |
139 | 139 |
s->h = avctx->height; |
140 |
+ s->frame_buf_size = frame_buf_size; |
|
140 | 141 |
s->curframe = 0; |
141 | 142 |
s->keyint = avctx->keyint_min; |
142 | 143 |
s->pix_fmt = avctx->pix_fmt; |
143 | 144 |
|
144 | 145 |
//set up AVFrames |
145 |
- s->last_frame.data[0] = s->pict_bufs[0]; |
|
146 |
- s->last_frame.linesize[0] = s->w; |
|
147 |
- s->best_frame.data[0] = s->pict_bufs[1]; |
|
148 |
- s->best_frame.linesize[0] = s->w; |
|
149 |
- s->scratch_frame.data[0] = s->pict_bufs[2]; |
|
150 |
- s->scratch_frame.linesize[0] = s->w; |
|
151 |
- |
|
152 |
- if(s->pix_fmt == AV_PIX_FMT_YUV420P) { |
|
153 |
- s->last_frame.data[1] = s->last_frame.data[0] + s->w * s->h; |
|
154 |
- s->last_frame.data[2] = s->last_frame.data[1] + ((s->w * s->h) >> 2); |
|
155 |
- s->last_frame.linesize[1] = s->last_frame.linesize[2] = s->w >> 1; |
|
156 |
- |
|
157 |
- s->best_frame.data[1] = s->best_frame.data[0] + s->w * s->h; |
|
158 |
- s->best_frame.data[2] = s->best_frame.data[1] + ((s->w * s->h) >> 2); |
|
159 |
- s->best_frame.linesize[1] = s->best_frame.linesize[2] = s->w >> 1; |
|
160 |
- |
|
161 |
- s->scratch_frame.data[1] = s->scratch_frame.data[0] + s->w * s->h; |
|
162 |
- s->scratch_frame.data[2] = s->scratch_frame.data[1] + ((s->w * s->h) >> 2); |
|
163 |
- s->scratch_frame.linesize[1] = s->scratch_frame.linesize[2] = s->w >> 1; |
|
146 |
+ s->last_frame->data[0] = s->pict_bufs[0]; |
|
147 |
+ s->last_frame->linesize[0] = s->w; |
|
148 |
+ s->best_frame->data[0] = s->pict_bufs[1]; |
|
149 |
+ s->best_frame->linesize[0] = s->w; |
|
150 |
+ s->scratch_frame->data[0] = s->pict_bufs[2]; |
|
151 |
+ s->scratch_frame->linesize[0] = s->w; |
|
152 |
+ |
|
153 |
+ if (s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
154 |
+ s->last_frame->data[1] = s->last_frame->data[0] + s->w * s->h; |
|
155 |
+ s->last_frame->data[2] = s->last_frame->data[1] + ((s->w * s->h) >> 2); |
|
156 |
+ s->last_frame->linesize[1] = s->last_frame->linesize[2] = s->w >> 1; |
|
157 |
+ |
|
158 |
+ s->best_frame->data[1] = s->best_frame->data[0] + s->w * s->h; |
|
159 |
+ s->best_frame->data[2] = s->best_frame->data[1] + ((s->w * s->h) >> 2); |
|
160 |
+ s->best_frame->linesize[1] = s->best_frame->linesize[2] = s->w >> 1; |
|
161 |
+ |
|
162 |
+ s->scratch_frame->data[1] = s->scratch_frame->data[0] + s->w * s->h; |
|
163 |
+ s->scratch_frame->data[2] = s->scratch_frame->data[1] + ((s->w * s->h) >> 2); |
|
164 |
+ s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1; |
|
165 |
+ |
|
166 |
+ s->input_frame->data[0] = s->pict_bufs[3]; |
|
167 |
+ s->input_frame->linesize[0] = s->w; |
|
168 |
+ s->input_frame->data[1] = s->input_frame->data[0] + s->w * s->h; |
|
169 |
+ s->input_frame->data[2] = s->input_frame->data[1] + ((s->w * s->h) >> 2); |
|
170 |
+ s->input_frame->linesize[1] = s->input_frame->linesize[2] = s->w >> 1; |
|
164 | 171 |
} |
165 | 172 |
|
173 |
+ s->min_strips = MIN_STRIPS; |
|
174 |
+ s->max_strips = MAX_STRIPS; |
|
175 |
+ |
|
176 |
+#ifdef CINEPAKENC_DEBUG |
|
166 | 177 |
s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0; |
178 |
+#endif |
|
167 | 179 |
|
168 | 180 |
return 0; |
169 | 181 |
|
170 | 182 |
enomem: |
171 |
- av_free(s->codebook_input); |
|
172 |
- av_free(s->codebook_closest); |
|
173 |
- av_free(s->strip_buf); |
|
174 |
- av_free(s->frame_buf); |
|
175 |
- av_free(s->mb); |
|
183 |
+ av_frame_free(&s->last_frame); |
|
184 |
+ av_frame_free(&s->best_frame); |
|
185 |
+ av_frame_free(&s->scratch_frame); |
|
186 |
+ if (avctx->pix_fmt == AV_PIX_FMT_RGB24) |
|
187 |
+ av_frame_free(&s->input_frame); |
|
188 |
+ av_freep(&s->codebook_input); |
|
189 |
+ av_freep(&s->codebook_closest); |
|
190 |
+ av_freep(&s->strip_buf); |
|
191 |
+ av_freep(&s->frame_buf); |
|
192 |
+ av_freep(&s->mb); |
|
176 | 193 |
#ifdef CINEPAKENC_DEBUG |
177 |
- av_free(s->best_mb); |
|
194 |
+ av_freep(&s->best_mb); |
|
178 | 195 |
#endif |
179 | 196 |
|
180 |
- for(x = 0; x < 3; x++) |
|
181 |
- av_free(s->pict_bufs[x]); |
|
197 |
+ for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++) |
|
198 |
+ av_freep(&s->pict_bufs[x]); |
|
182 | 199 |
|
183 | 200 |
return AVERROR(ENOMEM); |
184 | 201 |
} |
185 | 202 |
|
186 |
-static int64_t calculate_mode_score(CinepakEncContext *s, CinepakMode mode, int h, int v1_size, int v4_size, int v4, strip_info *info) |
|
203 |
+static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk |
|
204 |
+#ifdef CINEPAK_REPORT_SERR |
|
205 |
+, int64_t *serr |
|
206 |
+#endif |
|
207 |
+) |
|
187 | 208 |
{ |
188 | 209 |
//score = FF_LAMBDA_SCALE * error + lambda * bits |
189 | 210 |
int x; |
190 |
- int entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4; |
|
211 |
+ int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4; |
|
191 | 212 |
int mb_count = s->w * h / MB_AREA; |
192 | 213 |
mb_info *mb; |
193 | 214 |
int64_t score1, score2, score3; |
194 |
- int64_t ret = s->lambda * ((v1_size ? CHUNK_HEADER_SIZE + v1_size * entry_size : 0) + |
|
195 |
- (v4_size ? CHUNK_HEADER_SIZE + v4_size * entry_size : 0) + |
|
215 |
+ int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) + |
|
216 |
+ (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) + |
|
196 | 217 |
CHUNK_HEADER_SIZE) << 3; |
197 | 218 |
|
198 |
- //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9li score mb_count %i", v1_size, v4_size, ret, mb_count); |
|
219 |
+ //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count); |
|
220 |
+ |
|
221 |
+#ifdef CINEPAK_REPORT_SERR |
|
222 |
+ *serr = 0; |
|
223 |
+#endif |
|
199 | 224 |
|
200 |
- switch(mode) { |
|
225 |
+ switch(info->mode) { |
|
201 | 226 |
case MODE_V1_ONLY: |
202 | 227 |
//one byte per MB |
203 | 228 |
ret += s->lambda * 8 * mb_count; |
204 | 229 |
|
230 |
+// while calculating we assume all blocks are ENC_V1 |
|
205 | 231 |
for(x = 0; x < mb_count; x++) { |
206 | 232 |
mb = &s->mb[x]; |
207 | 233 |
ret += FF_LAMBDA_SCALE * mb->v1_error; |
234 |
+#ifdef CINEPAK_REPORT_SERR |
|
235 |
+ *serr += mb->v1_error; |
|
236 |
+#endif |
|
237 |
+// this function is never called for report in MODE_V1_ONLY |
|
238 |
+// if(!report) |
|
208 | 239 |
mb->best_encoding = ENC_V1; |
209 | 240 |
} |
210 | 241 |
|
211 | 242 |
break; |
212 | 243 |
case MODE_V1_V4: |
213 | 244 |
//9 or 33 bits per MB |
214 |
- for(x = 0; x < mb_count; x++) { |
|
215 |
- mb = &s->mb[x]; |
|
216 |
- score1 = s->lambda * 9 + FF_LAMBDA_SCALE * mb->v1_error; |
|
217 |
- score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error[v4]; |
|
218 |
- |
|
219 |
- if(score1 <= score2) { |
|
245 |
+ if(report) { |
|
246 |
+// no moves between the corresponding training sets are allowed |
|
247 |
+ *training_set_v1_shrunk = *training_set_v4_shrunk = 0; |
|
248 |
+ for(x = 0; x < mb_count; x++) { |
|
249 |
+ int mberr; |
|
250 |
+ mb = &s->mb[x]; |
|
251 |
+ if(mb->best_encoding == ENC_V1) |
|
252 |
+ score1 = s->lambda * 9 + FF_LAMBDA_SCALE * (mberr=mb->v1_error); |
|
253 |
+ else |
|
254 |
+ score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error); |
|
220 | 255 |
ret += score1; |
221 |
- mb->best_encoding = ENC_V1; |
|
222 |
- } else { |
|
223 |
- ret += score2; |
|
224 |
- mb->best_encoding = ENC_V4; |
|
256 |
+#ifdef CINEPAK_REPORT_SERR |
|
257 |
+ *serr += mberr; |
|
258 |
+#endif |
|
259 |
+ } |
|
260 |
+ } else { // find best mode per block |
|
261 |
+ for(x = 0; x < mb_count; x++) { |
|
262 |
+ mb = &s->mb[x]; |
|
263 |
+ score1 = s->lambda * 9 + FF_LAMBDA_SCALE * mb->v1_error; |
|
264 |
+ score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error; |
|
265 |
+ |
|
266 |
+ if(score1 <= score2) { |
|
267 |
+ ret += score1; |
|
268 |
+#ifdef CINEPAK_REPORT_SERR |
|
269 |
+ *serr += mb->v1_error; |
|
270 |
+#endif |
|
271 |
+ mb->best_encoding = ENC_V1; |
|
272 |
+ } else { |
|
273 |
+ ret += score2; |
|
274 |
+#ifdef CINEPAK_REPORT_SERR |
|
275 |
+ *serr += mb->v4_error; |
|
276 |
+#endif |
|
277 |
+ mb->best_encoding = ENC_V4; |
|
278 |
+ } |
|
225 | 279 |
} |
226 | 280 |
} |
227 | 281 |
|
228 | 282 |
break; |
229 | 283 |
case MODE_MC: |
230 | 284 |
//1, 10 or 34 bits per MB |
231 |
- for(x = 0; x < mb_count; x++) { |
|
232 |
- mb = &s->mb[x]; |
|
233 |
- score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error; |
|
234 |
- score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error; |
|
235 |
- score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error[v4]; |
|
236 |
- |
|
237 |
- |
|
238 |
- if(score1 <= score2 && score1 <= score3) { |
|
239 |
- ret += score1; |
|
240 |
- mb->best_encoding = ENC_SKIP; |
|
241 |
- } else if(score2 <= score1 && score2 <= score3) { |
|
242 |
- ret += score2; |
|
243 |
- mb->best_encoding = ENC_V1; |
|
244 |
- } else { |
|
245 |
- ret += score3; |
|
246 |
- mb->best_encoding = ENC_V4; |
|
285 |
+ if(report) { |
|
286 |
+ int v1_shrunk = 0, v4_shrunk = 0; |
|
287 |
+ for(x = 0; x < mb_count; x++) { |
|
288 |
+ mb = &s->mb[x]; |
|
289 |
+// it is OK to move blocks to ENC_SKIP here |
|
290 |
+// but not to any codebook encoding! |
|
291 |
+ score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error; |
|
292 |
+ if(mb->best_encoding == ENC_SKIP) { |
|
293 |
+ ret += score1; |
|
294 |
+#ifdef CINEPAK_REPORT_SERR |
|
295 |
+ *serr += mb->skip_error; |
|
296 |
+#endif |
|
297 |
+ } else if(mb->best_encoding == ENC_V1) { |
|
298 |
+ if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) { |
|
299 |
+ mb->best_encoding = ENC_SKIP; |
|
300 |
+ ++v1_shrunk; |
|
301 |
+ ret += score1; |
|
302 |
+#ifdef CINEPAK_REPORT_SERR |
|
303 |
+ *serr += mb->skip_error; |
|
304 |
+#endif |
|
305 |
+ } else { |
|
306 |
+ ret += score2; |
|
307 |
+#ifdef CINEPAK_REPORT_SERR |
|
308 |
+ *serr += mb->v1_error; |
|
309 |
+#endif |
|
310 |
+ } |
|
311 |
+ } else { |
|
312 |
+ if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) { |
|
313 |
+ mb->best_encoding = ENC_SKIP; |
|
314 |
+ ++v4_shrunk; |
|
315 |
+ ret += score1; |
|
316 |
+#ifdef CINEPAK_REPORT_SERR |
|
317 |
+ *serr += mb->skip_error; |
|
318 |
+#endif |
|
319 |
+ } else { |
|
320 |
+ ret += score3; |
|
321 |
+#ifdef CINEPAK_REPORT_SERR |
|
322 |
+ *serr += mb->v4_error; |
|
323 |
+#endif |
|
324 |
+ } |
|
325 |
+ } |
|
326 |
+ } |
|
327 |
+ *training_set_v1_shrunk = v1_shrunk; |
|
328 |
+ *training_set_v4_shrunk = v4_shrunk; |
|
329 |
+ } else { // find best mode per block |
|
330 |
+ for(x = 0; x < mb_count; x++) { |
|
331 |
+ mb = &s->mb[x]; |
|
332 |
+ score1 = s->lambda * 1 + FF_LAMBDA_SCALE * mb->skip_error; |
|
333 |
+ score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error; |
|
334 |
+ score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error; |
|
335 |
+ |
|
336 |
+ if(score1 <= score2 && score1 <= score3) { |
|
337 |
+ ret += score1; |
|
338 |
+#ifdef CINEPAK_REPORT_SERR |
|
339 |
+ *serr += mb->skip_error; |
|
340 |
+#endif |
|
341 |
+ mb->best_encoding = ENC_SKIP; |
|
342 |
+ } else if(score2 <= score3) { |
|
343 |
+ ret += score2; |
|
344 |
+#ifdef CINEPAK_REPORT_SERR |
|
345 |
+ *serr += mb->v1_error; |
|
346 |
+#endif |
|
347 |
+ mb->best_encoding = ENC_V1; |
|
348 |
+ } else { |
|
349 |
+ ret += score3; |
|
350 |
+#ifdef CINEPAK_REPORT_SERR |
|
351 |
+ *serr += mb->v4_error; |
|
352 |
+#endif |
|
353 |
+ mb->best_encoding = ENC_V4; |
|
354 |
+ } |
|
247 | 355 |
} |
248 | 356 |
} |
249 | 357 |
|
... | ... |
@@ -262,13 +418,45 @@ static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size |
262 | 262 |
|
263 | 263 |
static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf) |
264 | 264 |
{ |
265 |
- int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4; |
|
266 |
- |
|
267 |
- ret = write_chunk_header(buf, s->pix_fmt == AV_PIX_FMT_YUV420P ? chunk_type_yuv : chunk_type_gray, entry_size * size); |
|
268 |
- |
|
269 |
- for(x = 0; x < size; x++) |
|
270 |
- for(y = 0; y < entry_size; y++) |
|
271 |
- buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0); |
|
265 |
+ int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4; |
|
266 |
+ int incremental_codebook_replacement_mode = 0; // hardcoded here, |
|
267 |
+ // the compiler should notice that this is a constant -- rl |
|
268 |
+ |
|
269 |
+ ret = write_chunk_header(buf, |
|
270 |
+ s->pix_fmt == AV_PIX_FMT_RGB24 ? |
|
271 |
+ chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) : |
|
272 |
+ chunk_type_gray+(incremental_codebook_replacement_mode?1:0), |
|
273 |
+ entry_size * size |
|
274 |
+ + (incremental_codebook_replacement_mode?(size+31)/32*4:0) ); |
|
275 |
+ |
|
276 |
+// we do codebook encoding according to the "intra" mode |
|
277 |
+// but we keep the "dead" code for reference in case we will want |
|
278 |
+// to use incremental codebook updates (which actually would give us |
|
279 |
+// "kind of" motion compensation, especially in 1 strip/frame case) -- rl |
|
280 |
+// (of course, the code will be not useful as-is) |
|
281 |
+ if(incremental_codebook_replacement_mode) { |
|
282 |
+ int flags = 0; |
|
283 |
+ int flagsind; |
|
284 |
+ for(x = 0; x < size; x++) { |
|
285 |
+ if(flags == 0) { |
|
286 |
+ flagsind = ret; |
|
287 |
+ ret += 4; |
|
288 |
+ flags = 0x80000000; |
|
289 |
+ } else |
|
290 |
+ flags = ((flags>>1) | 0x80000000); |
|
291 |
+ for(y = 0; y < entry_size; y++) |
|
292 |
+ buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0); |
|
293 |
+ if((flags&0xffffffff) == 0xffffffff) { |
|
294 |
+ AV_WB32(&buf[flagsind], flags); |
|
295 |
+ flags = 0; |
|
296 |
+ } |
|
297 |
+ } |
|
298 |
+ if(flags) |
|
299 |
+ AV_WB32(&buf[flagsind], flags); |
|
300 |
+ } else |
|
301 |
+ for(x = 0; x < size; x++) |
|
302 |
+ for(y = 0; y < entry_size; y++) |
|
303 |
+ buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0); |
|
272 | 304 |
|
273 | 305 |
return ret; |
274 | 306 |
} |
... | ... |
@@ -279,7 +467,7 @@ static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, A |
279 | 279 |
out->data[0] = in->data[0] + x + y * in->linesize[0]; |
280 | 280 |
out->linesize[0] = in->linesize[0]; |
281 | 281 |
|
282 |
- if(s->pix_fmt == AV_PIX_FMT_YUV420P) { |
|
282 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
283 | 283 |
out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1]; |
284 | 284 |
out->linesize[1] = in->linesize[1]; |
285 | 285 |
|
... | ... |
@@ -289,47 +477,47 @@ static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, A |
289 | 289 |
} |
290 | 290 |
|
291 | 291 |
//decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict |
292 |
-static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, mb_info *mb, strip_info *info) |
|
292 |
+static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info) |
|
293 | 293 |
{ |
294 |
- int entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4; |
|
294 |
+ int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4; |
|
295 | 295 |
|
296 | 296 |
sub_pict->data[0][0] = |
297 | 297 |
sub_pict->data[0][1] = |
298 | 298 |
sub_pict->data[0][ sub_pict->linesize[0]] = |
299 |
- sub_pict->data[0][1+ sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size]; |
|
299 |
+ sub_pict->data[0][1+ sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size]; |
|
300 | 300 |
|
301 | 301 |
sub_pict->data[0][2] = |
302 | 302 |
sub_pict->data[0][3] = |
303 | 303 |
sub_pict->data[0][2+ sub_pict->linesize[0]] = |
304 |
- sub_pict->data[0][3+ sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size+1]; |
|
304 |
+ sub_pict->data[0][3+ sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1]; |
|
305 | 305 |
|
306 | 306 |
sub_pict->data[0][2*sub_pict->linesize[0]] = |
307 | 307 |
sub_pict->data[0][1+2*sub_pict->linesize[0]] = |
308 | 308 |
sub_pict->data[0][ 3*sub_pict->linesize[0]] = |
309 |
- sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size+2]; |
|
309 |
+ sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2]; |
|
310 | 310 |
|
311 | 311 |
sub_pict->data[0][2+2*sub_pict->linesize[0]] = |
312 | 312 |
sub_pict->data[0][3+2*sub_pict->linesize[0]] = |
313 | 313 |
sub_pict->data[0][2+3*sub_pict->linesize[0]] = |
314 |
- sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size+3]; |
|
314 |
+ sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3]; |
|
315 | 315 |
|
316 |
- if(s->pix_fmt == AV_PIX_FMT_YUV420P) { |
|
316 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
317 | 317 |
sub_pict->data[1][0] = |
318 | 318 |
sub_pict->data[1][1] = |
319 | 319 |
sub_pict->data[1][ sub_pict->linesize[1]] = |
320 |
- sub_pict->data[1][1+ sub_pict->linesize[1]] = info->v1_codebook[mb->v1_vector*entry_size+4]; |
|
320 |
+ sub_pict->data[1][1+ sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4]; |
|
321 | 321 |
|
322 | 322 |
sub_pict->data[2][0] = |
323 | 323 |
sub_pict->data[2][1] = |
324 | 324 |
sub_pict->data[2][ sub_pict->linesize[2]] = |
325 |
- sub_pict->data[2][1+ sub_pict->linesize[2]] = info->v1_codebook[mb->v1_vector*entry_size+5]; |
|
325 |
+ sub_pict->data[2][1+ sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5]; |
|
326 | 326 |
} |
327 | 327 |
} |
328 | 328 |
|
329 | 329 |
//decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict |
330 | 330 |
static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info) |
331 | 331 |
{ |
332 |
- int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4; |
|
332 |
+ int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4; |
|
333 | 333 |
|
334 | 334 |
for(i = y = 0; y < 4; y += 2) { |
335 | 335 |
for(x = 0; x < 4; x += 2, i++) { |
... | ... |
@@ -338,7 +526,7 @@ static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_ |
338 | 338 |
sub_pict->data[0][x + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2]; |
339 | 339 |
sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3]; |
340 | 340 |
|
341 |
- if(s->pix_fmt == AV_PIX_FMT_YUV420P) { |
|
341 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
342 | 342 |
sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4]; |
343 | 343 |
sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5]; |
344 | 344 |
} |
... | ... |
@@ -346,39 +534,62 @@ static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_ |
346 | 346 |
} |
347 | 347 |
} |
348 | 348 |
|
349 |
-static int encode_mode(CinepakEncContext *s, CinepakMode mode, int h, int v1_size, int v4_size, int v4, AVPicture *scratch_pict, strip_info *info, unsigned char *buf) |
|
349 |
+static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b) |
|
350 |
+{ |
|
351 |
+ int y, p; |
|
352 |
+ |
|
353 |
+ for(y = 0; y < MB_SIZE; y++) { |
|
354 |
+ memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0], |
|
355 |
+ MB_SIZE); |
|
356 |
+ } |
|
357 |
+ |
|
358 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
359 |
+ for(p = 1; p <= 2; p++) { |
|
360 |
+ for(y = 0; y < MB_SIZE/2; y++) { |
|
361 |
+ memcpy(a->data[p] + y*a->linesize[p], |
|
362 |
+ b->data[p] + y*b->linesize[p], |
|
363 |
+ MB_SIZE/2); |
|
364 |
+ } |
|
365 |
+ } |
|
366 |
+ } |
|
367 |
+} |
|
368 |
+ |
|
369 |
+static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf) |
|
350 | 370 |
{ |
351 | 371 |
int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA; |
352 | 372 |
int needs_extra_bit, should_write_temp; |
353 | 373 |
unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B |
354 | 374 |
mb_info *mb; |
355 |
- AVPicture sub_scratch; |
|
375 |
+ AVPicture sub_scratch, sub_last; |
|
356 | 376 |
|
357 | 377 |
//encode codebooks |
358 |
- if(v1_size) |
|
359 |
- ret += encode_codebook(s, info->v1_codebook, v1_size, 0x22, 0x26, buf + ret); |
|
378 |
+////// MacOS vintage decoder compatibility dictates the presence of |
|
379 |
+////// the codebook chunk even when the codebook is empty - pretty dumb... |
|
380 |
+////// and also the certain order of the codebook chunks -- rl |
|
381 |
+// if(info->v4_size) |
|
382 |
+ ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret); |
|
360 | 383 |
|
361 |
- if(v4_size) |
|
362 |
- ret += encode_codebook(s, info->v4_codebook, v4_size, 0x20, 0x24, buf + ret); |
|
384 |
+// if(info->v1_size) |
|
385 |
+ ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret); |
|
363 | 386 |
|
364 | 387 |
//update scratch picture |
365 | 388 |
for(z = y = 0; y < h; y += MB_SIZE) { |
366 | 389 |
for(x = 0; x < s->w; x += MB_SIZE, z++) { |
367 | 390 |
mb = &s->mb[z]; |
368 | 391 |
|
369 |
- if(mode == MODE_MC && mb->best_encoding == ENC_SKIP) |
|
370 |
- continue; |
|
371 |
- |
|
372 | 392 |
get_sub_picture(s, x, y, scratch_pict, &sub_scratch); |
373 | 393 |
|
374 |
- if(mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1) |
|
375 |
- decode_v1_vector(s, &sub_scratch, mb, info); |
|
376 |
- else if(mode != MODE_V1_ONLY && mb->best_encoding == ENC_V4) |
|
377 |
- decode_v4_vector(s, &sub_scratch, mb->v4_vector[v4], info); |
|
394 |
+ if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) { |
|
395 |
+ get_sub_picture(s, x, y, last_pict, &sub_last); |
|
396 |
+ copy_mb(s, &sub_scratch, &sub_last); |
|
397 |
+ } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1) |
|
398 |
+ decode_v1_vector(s, &sub_scratch, mb->v1_vector, info); |
|
399 |
+ else |
|
400 |
+ decode_v4_vector(s, &sub_scratch, mb->v4_vector, info); |
|
378 | 401 |
} |
379 | 402 |
} |
380 | 403 |
|
381 |
- switch(mode) { |
|
404 |
+ switch(info->mode) { |
|
382 | 405 |
case MODE_V1_ONLY: |
383 | 406 |
//av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count); |
384 | 407 |
ret += write_chunk_header(buf + ret, 0x32, mb_count); |
... | ... |
@@ -408,7 +619,7 @@ static int encode_mode(CinepakEncContext *s, CinepakMode mode, int h, int v1_siz |
408 | 408 |
buf[ret++] = mb->v1_vector; |
409 | 409 |
else |
410 | 410 |
for(z = 0; z < 4; z++) |
411 |
- buf[ret++] = mb->v4_vector[v4][z]; |
|
411 |
+ buf[ret++] = mb->v4_vector[z]; |
|
412 | 412 |
} |
413 | 413 |
} |
414 | 414 |
|
... | ... |
@@ -456,7 +667,7 @@ static int encode_mode(CinepakEncContext *s, CinepakMode mode, int h, int v1_siz |
456 | 456 |
temp[temp_size++] = mb->v1_vector; |
457 | 457 |
else if(mb->best_encoding == ENC_V4) |
458 | 458 |
for(z = 0; z < 4; z++) |
459 |
- temp[temp_size++] = mb->v4_vector[v4][z]; |
|
459 |
+ temp[temp_size++] = mb->v4_vector[z]; |
|
460 | 460 |
|
461 | 461 |
if(should_write_temp) { |
462 | 462 |
memcpy(&buf[ret], temp, temp_size); |
... | ... |
@@ -492,7 +703,7 @@ static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture * |
492 | 492 |
} |
493 | 493 |
} |
494 | 494 |
|
495 |
- if(s->pix_fmt == AV_PIX_FMT_YUV420P) { |
|
495 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
496 | 496 |
for(p = 1; p <= 2; p++) { |
497 | 497 |
for(y = 0; y < MB_SIZE/2; y++) { |
498 | 498 |
for(x = 0; x < MB_SIZE/2; x++) { |
... | ... |
@@ -506,19 +717,30 @@ static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture * |
506 | 506 |
return ret; |
507 | 507 |
} |
508 | 508 |
|
509 |
-static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, int size, int v4, strip_info *info) |
|
509 |
+// return the possibly adjusted size of the codebook |
|
510 |
+#define CERTAIN(x) ((x)!=ENC_UNCERTAIN) |
|
511 |
+static int quantize(CinepakEncContext *s, int h, AVPicture *pict, |
|
512 |
+ int v1mode, strip_info *info, |
|
513 |
+ mb_encoding encoding) |
|
510 | 514 |
{ |
511 |
- int x, y, i, j, k, x2, y2, x3, y3, plane, shift; |
|
512 |
- int entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4; |
|
515 |
+ int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn; |
|
516 |
+ int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4; |
|
513 | 517 |
int *codebook = v1mode ? info->v1_codebook : info->v4_codebook; |
518 |
+ int size = v1mode ? info->v1_size : info->v4_size; |
|
514 | 519 |
int64_t total_error = 0; |
515 | 520 |
uint8_t vq_pict_buf[(MB_AREA*3)/2]; |
516 | 521 |
AVPicture sub_pict, vq_pict; |
517 | 522 |
|
518 |
- for(i = y = 0; y < h; y += MB_SIZE) { |
|
519 |
- for(x = 0; x < s->w; x += MB_SIZE, i += v1mode ? 1 : 4) { |
|
520 |
- int *base = s->codebook_input + i*entry_size; |
|
523 |
+ for(mbn = i = y = 0; y < h; y += MB_SIZE) { |
|
524 |
+ for(x = 0; x < s->w; x += MB_SIZE, ++mbn) { |
|
525 |
+ int *base; |
|
521 | 526 |
|
527 |
+ if(CERTAIN(encoding)) { |
|
528 |
+// use for the training only the blocks known to be to be encoded [sic:-] |
|
529 |
+ if(s->mb[mbn].best_encoding != encoding) continue; |
|
530 |
+ } |
|
531 |
+ |
|
532 |
+ base = s->codebook_input + i*entry_size; |
|
522 | 533 |
if(v1mode) { |
523 | 534 |
//subsample |
524 | 535 |
for(j = y2 = 0; y2 < entry_size; y2 += 2) { |
... | ... |
@@ -553,8 +775,19 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, in |
553 | 553 |
} |
554 | 554 |
} |
555 | 555 |
} |
556 |
+ i += v1mode ? 1 : 4; |
|
556 | 557 |
} |
557 | 558 |
} |
559 |
+// if(i < mbn*(v1mode ? 1 : 4)) { |
|
560 |
+// av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding); |
|
561 |
+// } |
|
562 |
+ |
|
563 |
+ if(i == 0) // empty training set, nothing to do |
|
564 |
+ return 0; |
|
565 |
+ if(i < size) { |
|
566 |
+ //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding); |
|
567 |
+ size = i; |
|
568 |
+ } |
|
558 | 569 |
|
559 | 570 |
ff_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx); |
560 | 571 |
ff_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx); |
... | ... |
@@ -568,8 +801,11 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, in |
568 | 568 |
|
569 | 569 |
//copy indices |
570 | 570 |
for(i = j = y = 0; y < h; y += MB_SIZE) { |
571 |
- for(x = 0; x < s->w; x += MB_SIZE, j++, i += v1mode ? 1 : 4) { |
|
571 |
+ for(x = 0; x < s->w; x += MB_SIZE, j++) { |
|
572 | 572 |
mb_info *mb = &s->mb[j]; |
573 |
+// skip uninteresting blocks if we know their preferred encoding |
|
574 |
+ if(CERTAIN(encoding) && mb->best_encoding != encoding) |
|
575 |
+ continue; |
|
573 | 576 |
|
574 | 577 |
//point sub_pict to current MB |
575 | 578 |
get_sub_picture(s, x, y, pict, &sub_pict); |
... | ... |
@@ -578,26 +814,29 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, in |
578 | 578 |
mb->v1_vector = s->codebook_closest[i]; |
579 | 579 |
|
580 | 580 |
//fill in vq_pict with V1 data |
581 |
- decode_v1_vector(s, &vq_pict, mb, info); |
|
581 |
+ decode_v1_vector(s, &vq_pict, mb->v1_vector, info); |
|
582 | 582 |
|
583 | 583 |
mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict); |
584 | 584 |
total_error += mb->v1_error; |
585 | 585 |
} else { |
586 | 586 |
for(k = 0; k < 4; k++) |
587 |
- mb->v4_vector[v4][k] = s->codebook_closest[i+k]; |
|
587 |
+ mb->v4_vector[k] = s->codebook_closest[i+k]; |
|
588 | 588 |
|
589 | 589 |
//fill in vq_pict with V4 data |
590 |
- decode_v4_vector(s, &vq_pict, mb->v4_vector[v4], info); |
|
590 |
+ decode_v4_vector(s, &vq_pict, mb->v4_vector, info); |
|
591 | 591 |
|
592 |
- mb->v4_error[v4] = compute_mb_distortion(s, &sub_pict, &vq_pict); |
|
593 |
- total_error += mb->v4_error[v4]; |
|
592 |
+ mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict); |
|
593 |
+ total_error += mb->v4_error; |
|
594 | 594 |
} |
595 |
+ i += v1mode ? 1 : 4; |
|
595 | 596 |
} |
596 | 597 |
} |
598 |
+// check that we did it right in the beginning of the function |
|
599 |
+ av_assert0(i >= size); // training set is no smaller than the codebook |
|
597 | 600 |
|
598 |
- //av_log(s->avctx, AV_LOG_INFO, "mode %i size %i i %i error %li\n", v1mode, size, i, total_error); |
|
601 |
+ //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error); |
|
599 | 602 |
|
600 |
- return 0; |
|
603 |
+ return size; |
|
601 | 604 |
} |
602 | 605 |
|
603 | 606 |
static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info) |
... | ... |
@@ -617,59 +856,151 @@ static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_p |
617 | 617 |
|
618 | 618 |
static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size) |
619 | 619 |
{ |
620 |
- buf[0] = keyframe ? 0x11: 0x10; |
|
620 |
+// actually we are exclusively using intra strip coding (how much can we win |
|
621 |
+// otherwise? how to choose which part of a codebook to update?), |
|
622 |
+// keyframes are different only because we disallow ENC_SKIP on them -- rl |
|
623 |
+// (besides, the logic here used to be inverted: ) |
|
624 |
+// buf[0] = keyframe ? 0x11: 0x10; |
|
625 |
+ buf[0] = keyframe ? 0x10: 0x11; |
|
621 | 626 |
AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE); |
622 |
- AV_WB16(&buf[4], y); |
|
627 |
+// AV_WB16(&buf[4], y); /* using absolute y values works -- rl */ |
|
628 |
+ AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */ |
|
623 | 629 |
AV_WB16(&buf[6], 0); |
624 |
- AV_WB16(&buf[8], h); |
|
630 |
+// AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */ |
|
631 |
+ AV_WB16(&buf[8], h); /* using relative values works as well -- rl */ |
|
625 | 632 |
AV_WB16(&buf[10], s->w); |
633 |
+ //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe); |
|
626 | 634 |
} |
627 | 635 |
|
628 |
-static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score) |
|
636 |
+static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score |
|
637 |
+#ifdef CINEPAK_REPORT_SERR |
|
638 |
+, int64_t *best_serr |
|
639 |
+#endif |
|
640 |
+) |
|
629 | 641 |
{ |
630 | 642 |
int64_t score = 0; |
631 |
- int best_size = 0, v1_size, v4_size, v4, mb_count = s->w * h / MB_AREA; |
|
643 |
+#ifdef CINEPAK_REPORT_SERR |
|
644 |
+ int64_t serr; |
|
645 |
+#endif |
|
646 |
+ int best_size = 0; |
|
632 | 647 |
strip_info info; |
633 |
- CinepakMode best_mode; |
|
634 |
- int v4_codebooks[CODEBOOK_NUM][CODEBOOK_MAX*VECTOR_MAX]; |
|
648 |
+// for codebook optimization: |
|
649 |
+ int v1enough, v1_size, v4enough, v4_size; |
|
650 |
+ int new_v1_size, new_v4_size; |
|
651 |
+ int v1shrunk, v4shrunk; |
|
635 | 652 |
|
636 | 653 |
if(!keyframe) |
637 | 654 |
calculate_skip_errors(s, h, last_pict, pict, &info); |
638 | 655 |
|
639 |
- //precompute V4 codebooks |
|
640 |
- for(v4_size = 1, v4 = 0; v4_size <= 256; v4_size <<= 2, v4++) { |
|
641 |
- info.v4_codebook = v4_codebooks[v4]; |
|
642 |
- quantize(s, h, pict, 0, v4_size, v4, &info); |
|
643 |
- } |
|
644 |
- |
|
645 |
- //try all powers of 4 for the size of the codebooks |
|
646 |
- //constraint the v4 codebook to be no bigger than the v1 codebook |
|
647 |
- for(v1_size = 1; v1_size <= 256; v1_size <<= 2) { |
|
648 |
- //compute V1 codebook |
|
649 |
- quantize(s, h, pict, 1, v1_size, -1, &info); |
|
650 |
- |
|
651 |
- for(v4_size = 0, v4 = -1; v4_size <= v1_size; v4_size = v4_size ? v4_size << 2 : v1_size >= 4 ? v1_size >> 2 : 1, v4++) { |
|
656 |
+ //try some powers of 4 for the size of the codebooks |
|
657 |
+ //constraint the v4 codebook to be no bigger than v1 one, |
|
658 |
+ //(and no less than v1_size/4) |
|
659 |
+ //thus making v1 preferable and possibly losing small details? should be ok |
|
660 |
+#define SMALLEST_CODEBOOK 1 |
|
661 |
+ for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) { |
|
662 |
+ for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) { |
|
652 | 663 |
//try all modes |
653 | 664 |
for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) { |
654 |
- //don't allow MODE_MC in inter frames |
|
665 |
+ //don't allow MODE_MC in intra frames |
|
655 | 666 |
if(keyframe && mode == MODE_MC) |
656 | 667 |
continue; |
657 | 668 |
|
658 |
- //only allow V1-only mode if v4 codebook is empty |
|
659 |
- if(!v4_size && mode != MODE_V1_ONLY) |
|
660 |
- continue; |
|
669 |
+ if(mode == MODE_V1_ONLY) { |
|
670 |
+ info.v1_size = v1_size; |
|
671 |
+// the size may shrink even before optimizations if the input is short: |
|
672 |
+ info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN); |
|
673 |
+ if(info.v1_size < v1_size) |
|
674 |
+// too few eligible blocks, no sense in trying bigger sizes |
|
675 |
+ v1enough = 1; |
|
676 |
+ |
|
677 |
+ info.v4_size = 0; |
|
678 |
+ } else { // mode != MODE_V1_ONLY |
|
679 |
+ // if v4 codebook is empty then only allow V1-only mode |
|
680 |
+ if(!v4_size) |
|
681 |
+ continue; |
|
682 |
+ |
|
683 |
+ if(mode == MODE_V1_V4) { |
|
684 |
+ info.v4_size = v4_size; |
|
685 |
+ info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN); |
|
686 |
+ if(info.v4_size < v4_size) |
|
687 |
+// too few eligible blocks, no sense in trying bigger sizes |
|
688 |
+ v4enough = 1; |
|
689 |
+ } |
|
690 |
+ } |
|
661 | 691 |
|
662 |
- info.v4_codebook = v4 >= 0 ? v4_codebooks[v4] : NULL; |
|
663 |
- score = calculate_mode_score(s, mode, h, v1_size, v4_size, v4, &info); |
|
692 |
+ info.mode = mode; |
|
693 |
+// choose the best encoding per block, based on current experience |
|
694 |
+ score = calculate_mode_score(s, h, &info, 0, |
|
695 |
+ &v1shrunk, &v4shrunk |
|
696 |
+#ifdef CINEPAK_REPORT_SERR |
|
697 |
+, &serr |
|
698 |
+#endif |
|
699 |
+); |
|
700 |
+ |
|
701 |
+ if(mode != MODE_V1_ONLY){ |
|
702 |
+// recompute the codebooks, omitting the extra blocks |
|
703 |
+// we assume we _may_ come here with more blocks to encode than before |
|
704 |
+ info.v1_size = v1_size; |
|
705 |
+ new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1); |
|
706 |
+ if(new_v1_size < info.v1_size){ |
|
707 |
+ //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size); |
|
708 |
+ info.v1_size = new_v1_size; |
|
709 |
+ } |
|
710 |
+// we assume we _may_ come here with more blocks to encode than before |
|
711 |
+ info.v4_size = v4_size; |
|
712 |
+ new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4); |
|
713 |
+ if(new_v4_size < info.v4_size) { |
|
714 |
+ //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size); |
|
715 |
+ info.v4_size = new_v4_size; |
|
716 |
+ } |
|
717 |
+// calculate the resulting score |
|
718 |
+// (do not move blocks to codebook encodings now, as some blocks may have |
|
719 |
+// got bigger errors despite a smaller training set - but we do not |
|
720 |
+// ever grow the training sets back) |
|
721 |
+ for(;;) { |
|
722 |
+ score = calculate_mode_score(s, h, &info, 1, |
|
723 |
+ &v1shrunk, &v4shrunk |
|
724 |
+#ifdef CINEPAK_REPORT_SERR |
|
725 |
+, &serr |
|
726 |
+#endif |
|
727 |
+); |
|
728 |
+// do we have a reason to reiterate? |
|
729 |
+ if(!v1shrunk && !v4shrunk) break; |
|
730 |
+// recompute the codebooks, omitting the extra blocks |
|
731 |
+ if(v1shrunk) { |
|
732 |
+ info.v1_size = v1_size; |
|
733 |
+ new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1); |
|
734 |
+ if(new_v1_size < info.v1_size){ |
|
735 |
+ //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size); |
|
736 |
+ info.v1_size = new_v1_size; |
|
737 |
+ } |
|
738 |
+ } |
|
739 |
+ if(v4shrunk) { |
|
740 |
+ info.v4_size = v4_size; |
|
741 |
+ new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4); |
|
742 |
+ if(new_v4_size < info.v4_size) { |
|
743 |
+ //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size); |
|
744 |
+ info.v4_size = new_v4_size; |
|
745 |
+ } |
|
746 |
+ } |
|
747 |
+ } |
|
748 |
+ } |
|
664 | 749 |
|
665 |
- //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %li\n", v1_size, v4_size, score); |
|
750 |
+ //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score); |
|
666 | 751 |
|
667 | 752 |
if(best_size == 0 || score < *best_score) { |
753 |
+ |
|
668 | 754 |
*best_score = score; |
669 |
- best_size = encode_mode(s, mode, h, v1_size, v4_size, v4, scratch_pict, &info, s->strip_buf + STRIP_HEADER_SIZE); |
|
670 |
- best_mode = mode; |
|
755 |
+#ifdef CINEPAK_REPORT_SERR |
|
756 |
+ *best_serr = serr; |
|
757 |
+#endif |
|
758 |
+ best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE); |
|
671 | 759 |
|
672 |
- av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18li %i B\n", mode, v1_size, v4_size, score, best_size); |
|
760 |
+ //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size); |
|
761 |
+ //av_log(s->avctx, AV_LOG_INFO, "\n"); |
|
762 |
+#ifdef CINEPAK_REPORT_SERR |
|
763 |
+ av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size); |
|
764 |
+#endif |
|
673 | 765 |
|
674 | 766 |
#ifdef CINEPAKENC_DEBUG |
675 | 767 |
//save MB encoding choices |
... | ... |
@@ -678,6 +1009,7 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture |
678 | 678 |
|
679 | 679 |
//memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size); |
680 | 680 |
write_strip_header(s, y, h, keyframe, s->strip_buf, best_size); |
681 |
+ |
|
681 | 682 |
} |
682 | 683 |
} |
683 | 684 |
} |
... | ... |
@@ -685,11 +1017,11 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture |
685 | 685 |
|
686 | 686 |
#ifdef CINEPAKENC_DEBUG |
687 | 687 |
//gather stats. this will only work properly of MAX_STRIPS == 1 |
688 |
- if(best_mode == MODE_V1_ONLY) { |
|
688 |
+ if(best_info.mode == MODE_V1_ONLY) { |
|
689 | 689 |
s->num_v1_mode++; |
690 | 690 |
s->num_v1_encs += s->w*h/MB_AREA; |
691 | 691 |
} else { |
692 |
- if(best_mode == MODE_V1_V4) |
|
692 |
+ if(best_info.mode == MODE_V1_V4) |
|
693 | 693 |
s->num_v4_mode++; |
694 | 694 |
else |
695 | 695 |
s->num_mc_mode++; |
... | ... |
@@ -708,13 +1040,12 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture |
708 | 708 |
best_size += STRIP_HEADER_SIZE; |
709 | 709 |
memcpy(buf, s->strip_buf, best_size); |
710 | 710 |
|
711 |
- |
|
712 | 711 |
return best_size; |
713 | 712 |
} |
714 | 713 |
|
715 |
-static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size) |
|
714 |
+static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe) |
|
716 | 715 |
{ |
717 |
- buf[0] = 0; |
|
716 |
+ buf[0] = isakeyframe ? 0 : 1; |
|
718 | 717 |
AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE); |
719 | 718 |
AV_WB16(&buf[4], s->w); |
720 | 719 |
AV_WB16(&buf[6], s->h); |
... | ... |
@@ -723,67 +1054,175 @@ static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_s |
723 | 723 |
return CVID_HEADER_SIZE; |
724 | 724 |
} |
725 | 725 |
|
726 |
-static int rd_frame(CinepakEncContext *s, AVFrame *frame, unsigned char *buf, int buf_size) |
|
726 |
+static int rd_frame(CinepakEncContext *s, AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size) |
|
727 | 727 |
{ |
728 |
- int num_strips, strip, h, i, y, size, temp_size, best_size; |
|
728 |
+ int num_strips, strip, i, y, nexty, size, temp_size, best_size; |
|
729 | 729 |
AVPicture last_pict, pict, scratch_pict; |
730 | 730 |
int64_t best_score = 0, score, score_temp; |
731 |
+#ifdef CINEPAK_REPORT_SERR |
|
732 |
+ int64_t best_serr = 0, serr, serr_temp; |
|
733 |
+#endif |
|
731 | 734 |
|
732 |
- //TODO: support encoding zero strips (meaning skip the whole frame) |
|
733 |
- for(num_strips = MIN_STRIPS; num_strips <= MAX_STRIPS && num_strips <= s->h / MB_SIZE; num_strips++) { |
|
735 |
+ int best_nstrips; |
|
736 |
+ |
|
737 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) { |
|
738 |
+ int x; |
|
739 |
+// build a copy of the given frame in the correct colorspace |
|
740 |
+ for(y = 0; y < s->h; y += 2) { |
|
741 |
+ for(x = 0; x < s->w; x += 2) { |
|
742 |
+ uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb; |
|
743 |
+ ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0]; |
|
744 |
+ ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0]; |
|
745 |
+ get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict); |
|
746 |
+ r = g = b = 0; |
|
747 |
+ for(i=0; i<4; ++i) { |
|
748 |
+ int i1, i2; |
|
749 |
+ i1 = (i&1); i2 = (i>=2); |
|
750 |
+ rr = ir[i2][i1*3+0]; |
|
751 |
+ gg = ir[i2][i1*3+1]; |
|
752 |
+ bb = ir[i2][i1*3+2]; |
|
753 |
+ r += rr; g += gg; b += bb; |
|
754 |
+// using fixed point arithmetic for portable repeatability, scaling by 2^23 |
|
755 |
+// "Y" |
|
756 |
+// rr = 0.2857*rr + 0.5714*gg + 0.1429*bb; |
|
757 |
+ rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23; |
|
758 |
+ if( rr < 0) rr = 0; |
|
759 |
+ else if (rr > 255) rr = 255; |
|
760 |
+ scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr; |
|
761 |
+ } |
|
762 |
+// let us scale down as late as possible |
|
763 |
+// r /= 4; g /= 4; b /= 4; |
|
764 |
+// "U" |
|
765 |
+// rr = -0.1429*r - 0.2857*g + 0.4286*b; |
|
766 |
+ rr = (-299683*r - 599156*g + 898839*b) >> 23; |
|
767 |
+ if( rr < -128) rr = -128; |
|
768 |
+ else if (rr > 127) rr = 127; |
|
769 |
+ scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned |
|
770 |
+// "V" |
|
771 |
+// rr = 0.3571*r - 0.2857*g - 0.0714*b; |
|
772 |
+ rr = (748893*r - 599156*g - 149737*b) >> 23; |
|
773 |
+ if( rr < -128) rr = -128; |
|
774 |
+ else if (rr > 127) rr = 127; |
|
775 |
+ scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned |
|
776 |
+ } |
|
777 |
+ } |
|
778 |
+ } |
|
779 |
+ |
|
780 |
+ //would be nice but quite certainly incompatible with vintage players: |
|
781 |
+ // support encoding zero strips (meaning skip the whole frame) |
|
782 |
+ for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) { |
|
734 | 783 |
score = 0; |
735 | 784 |
size = 0; |
736 |
- h = s->h / num_strips; |
|
737 |
- //make h into next multiple of 4 |
|
738 |
- h += 4 - (h & 3); |
|
785 |
+#ifdef CINEPAK_REPORT_SERR |
|
786 |
+ serr = 0; |
|
787 |
+#endif |
|
788 |
+ |
|
789 |
+ for(y = 0, strip = 1; y < s->h; strip++, y = nexty) { |
|
790 |
+ int strip_height; |
|
739 | 791 |
|
740 |
- for(strip = 0; strip < num_strips; strip++) { |
|
741 |
- y = strip*h; |
|
792 |
+ nexty = strip * s->h / num_strips; // <= s->h |
|
793 |
+ //make nexty the next multiple of 4 if not already there |
|
794 |
+ if(nexty & 3) |
|
795 |
+ nexty += 4 - (nexty & 3); |
|
742 | 796 |
|
743 |
- get_sub_picture(s, 0, y, (AVPicture*)frame, &pict); |
|
744 |
- get_sub_picture(s, 0, y, (AVPicture*)&s->last_frame, &last_pict); |
|
745 |
- get_sub_picture(s, 0, y, (AVPicture*)&s->scratch_frame, &scratch_pict); |
|
797 |
+ strip_height = nexty - y; |
|
798 |
+ if(strip_height <= 0) { // can this ever happen? |
|
799 |
+ av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips); |
|
800 |
+ continue; |
|
801 |
+ } |
|
802 |
+ |
|
803 |
+ if(s->pix_fmt == AV_PIX_FMT_RGB24) |
|
804 |
+ get_sub_picture(s, 0, y, (AVPicture*)s->input_frame, &pict); |
|
805 |
+ else |
|
806 |
+ get_sub_picture(s, 0, y, (AVPicture*)frame, &pict); |
|
807 |
+ get_sub_picture(s, 0, y, (AVPicture*)s->last_frame, &last_pict); |
|
808 |
+ get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict); |
|
746 | 809 |
|
747 |
- if((temp_size = rd_strip(s, y, FFMIN(h, s->h - y), frame->key_frame, &last_pict, &pict, &scratch_pict, s->frame_buf + CVID_HEADER_SIZE, &score_temp)) < 0) |
|
810 |
+ if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp |
|
811 |
+#ifdef CINEPAK_REPORT_SERR |
|
812 |
+, &serr_temp |
|
813 |
+#endif |
|
814 |
+)) < 0) |
|
748 | 815 |
return temp_size; |
749 | 816 |
|
750 | 817 |
score += score_temp; |
818 |
+#ifdef CINEPAK_REPORT_SERR |
|
819 |
+ serr += serr_temp; |
|
820 |
+#endif |
|
751 | 821 |
size += temp_size; |
822 |
+ //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe); |
|
823 |
+ //av_log(s->avctx, AV_LOG_INFO, "\n"); |
|
752 | 824 |
} |
753 | 825 |
|
754 | 826 |
if(best_score == 0 || score < best_score) { |
755 | 827 |
best_score = score; |
756 |
- best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size); |
|
757 |
- av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12li, %i B\n", num_strips, score, best_size); |
|
828 |
+#ifdef CINEPAK_REPORT_SERR |
|
829 |
+ best_serr = serr; |
|
830 |
+#endif |
|
831 |
+ best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe); |
|
832 |
+ //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size); |
|
833 |
+#ifdef CINEPAK_REPORT_SERR |
|
834 |
+ av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size); |
|
835 |
+#endif |
|
758 | 836 |
|
759 |
- FFSWAP(AVFrame, s->best_frame, s->scratch_frame); |
|
837 |
+ FFSWAP(AVFrame *, s->best_frame, s->scratch_frame); |
|
838 |
+ memcpy(buf, s->frame_buf, best_size); |
|
839 |
+ best_nstrips = num_strips; |
|
760 | 840 |
} |
841 |
+// avoid trying too many strip numbers without a real reason |
|
842 |
+// (this makes the processing of the very first frame faster) |
|
843 |
+ if(num_strips - best_nstrips > 4) |
|
844 |
+ break; |
|
761 | 845 |
} |
762 | 846 |
|
763 |
- memcpy(buf, s->frame_buf, best_size); |
|
847 |
+// let the number of strips slowly adapt to the changes in the contents, |
|
848 |
+// compared to full bruteforcing every time this will occasionally lead |
|
849 |
+// to some r/d performance loss but makes encoding up to several times faster |
|
850 |
+#ifdef CINEPAK_AGGRESSIVE_STRIP_NUMBER_ADAPTIVITY |
|
851 |
+ s->max_strips = best_nstrips + 4; |
|
852 |
+ if(s->max_strips >= MAX_STRIPS) |
|
853 |
+ s->max_strips = MAX_STRIPS; |
|
854 |
+ s->min_strips = best_nstrips - 4; |
|
855 |
+ if(s->min_strips < MIN_STRIPS) |
|
856 |
+ s->min_strips = MIN_STRIPS; |
|
857 |
+#else |
|
858 |
+ if(best_nstrips == s->max_strips) { // let us try to step up |
|
859 |
+ s->max_strips = best_nstrips + 1; |
|
860 |
+ if(s->max_strips >= MAX_STRIPS) |
|
861 |
+ s->max_strips = MAX_STRIPS; |
|
862 |
+ } else { // try to step down |
|
863 |
+ s->max_strips = best_nstrips; |
|
864 |
+ } |
|
865 |
+ s->min_strips = s->max_strips - 1; |
|
866 |
+ if(s->min_strips < MIN_STRIPS) |
|
867 |
+ s->min_strips = MIN_STRIPS; |
|
868 |
+#endif |
|
764 | 869 |
|
765 | 870 |
return best_size; |
766 | 871 |
} |
767 | 872 |
|
768 |
-static int cinepak_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data) |
|
873 |
+static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
|
874 |
+ const AVFrame *frame, int *got_packet) |
|
769 | 875 |
{ |
770 | 876 |
CinepakEncContext *s = avctx->priv_data; |
771 |
- AVFrame *frame = data; |
|
772 | 877 |
int ret; |
773 | 878 |
|
774 | 879 |
s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE; |
775 | 880 |
|
776 |
- frame->key_frame = s->curframe == 0; |
|
777 |
- frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; |
|
881 |
+ if ((ret = ff_alloc_packet(pkt, s->frame_buf_size)) < 0) |
|
882 |
+ return ret; |
|
883 |
+ ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size); |
|
884 |
+ pkt->size = ret; |
|
885 |
+ if (s->curframe == 0) |
|
886 |
+ pkt->flags |= AV_PKT_FLAG_KEY; |
|
887 |
+ *got_packet = 1; |
|
778 | 888 |
|
779 |
- ret = rd_frame(s, frame, buf, buf_size); |
|
780 |
- |
|
781 |
- FFSWAP(AVFrame, s->last_frame, s->best_frame); |
|
889 |
+ FFSWAP(AVFrame *, s->last_frame, s->best_frame); |
|
782 | 890 |
|
783 | 891 |
if (++s->curframe >= s->keyint) |
784 | 892 |
s->curframe = 0; |
785 | 893 |
|
786 |
- return ret; |
|
894 |
+ return 0; |
|
787 | 895 |
} |
788 | 896 |
|
789 | 897 |
static av_cold int cinepak_encode_end(AVCodecContext *avctx) |
... | ... |
@@ -791,20 +1230,27 @@ static av_cold int cinepak_encode_end(AVCodecContext *avctx) |
791 | 791 |
CinepakEncContext *s = avctx->priv_data; |
792 | 792 |
int x; |
793 | 793 |
|
794 |
- av_free(s->codebook_input); |
|
795 |
- av_free(s->codebook_closest); |
|
796 |
- av_free(s->strip_buf); |
|
797 |
- av_free(s->frame_buf); |
|
798 |
- av_free(s->mb); |
|
794 |
+ av_frame_free(&s->last_frame); |
|
795 |
+ av_frame_free(&s->best_frame); |
|
796 |
+ av_frame_free(&s->scratch_frame); |
|
797 |
+ if (avctx->pix_fmt == AV_PIX_FMT_RGB24) |
|
798 |
+ av_frame_free(&s->input_frame); |
|
799 |
+ av_freep(&s->codebook_input); |
|
800 |
+ av_freep(&s->codebook_closest); |
|
801 |
+ av_freep(&s->strip_buf); |
|
802 |
+ av_freep(&s->frame_buf); |
|
803 |
+ av_freep(&s->mb); |
|
799 | 804 |
#ifdef CINEPAKENC_DEBUG |
800 |
- av_free(s->best_mb); |
|
805 |
+ av_freep(&s->best_mb); |
|
801 | 806 |
#endif |
802 | 807 |
|
803 |
- for(x = 0; x < 3; x++) |
|
804 |
- av_free(s->pict_bufs[x]); |
|
808 |
+ for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++) |
|
809 |
+ av_freep(&s->pict_bufs[x]); |
|
805 | 810 |
|
811 |
+#ifdef CINEPAKENC_DEBUG |
|
806 | 812 |
av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n", |
807 | 813 |
s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips); |
814 |
+#endif |
|
808 | 815 |
|
809 | 816 |
return 0; |
810 | 817 |
} |