Browse code

cinepakenc: fixes and improvements

version 2013-02-08 Rl
- fixes/optimization in multistrip encoding and codebook size choice,
quality/bitrate is now better than that of the binary proprietary encoder

version 2013-02-12 Rl
- separated codebook training sets, avoided the transfer of wasted bytes,
which yields both better quality and smaller files
- now using the correct colorspace (TODO: move conversion to libswscale)

version 2013-02-14 Rl "Valentine's Day" version:
- made strip division more robust
- minimized bruteforcing the number of strips,
(costs some R/D but speeds up compession a lot), the heuristic
assumption is that score as a function of the number of strips has
one wide minimum which moves slowly, of course not fully true
- simplified codebook generation,
the old code was meant for other optimizations than we actually do
- optimized the codebook generation / error estimation for MODE_MC

version 2013-04-28 Rl
- bugfixed codebook optimization logic

version 2014-01-20 Rl
- made the encoder compatible with vintage decoders
and added some yet unused code for possible future
incremental codebook updates
- fixed a small memory leak

version 2014-01-21 Rl
- believe it or not, now we get even smaller files, with better quality
(which means I missed an optimization earlier :)

Signed-off-by: Diego Biurrun <diego@biurrun.de>

addr-see-the-website@aetey.se authored on 2017/06/28 07:27:12
Showing 1 changed files
... ...
@@ -1,6 +1,9 @@
1 1
 /*
2 2
  * Cinepak encoder (c) 2011 Tomas Härdin
3 3
  * http://titan.codemill.se/~tomhar/cinepakenc.patch
4
+ *
5
+ * Fixes and improvements, vintage decoders compatibility
6
+ *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
4 7
 
5 8
 Permission is hereby granted, free of charge, to any person obtaining a
6 9
 copy of this software and associated documentation files (the "Software"),
... ...
@@ -20,13 +23,27 @@ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 20
 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 21
 OTHER DEALINGS IN THE SOFTWARE.
22 22
 
23
+ * TODO:
24
+ * - optimize: color space conversion (move conversion to libswscale), ...
25
+ * - implement options to set the min/max number of strips?
26
+ * MAYBE:
27
+ * - "optimally" split the frame into several non-regular areas
28
+ *   using a separate codebook pair for each area and approximating
29
+ *   the area by several rectangular strips (generally not full width ones)
30
+ *   (use quadtree splitting? a simple fixed-granularity grid?)
23 31
  */
24 32
 
33
+#include <string.h>
34
+
25 35
 #include "libavutil/common.h"
36
+#include "libavutil/internal.h"
26 37
 #include "libavutil/intreadwrite.h"
27 38
 #include "avcodec.h"
28 39
 #include "libavutil/lfg.h"
29 40
 #include "elbg.h"
41
+#include "internal.h"
42
+
43
+#include "libavutil/avassert.h"
30 44
 
31 45
 #define CVID_HEADER_SIZE 10
32 46
 #define STRIP_HEADER_SIZE 12
... ...
@@ -36,11 +53,20 @@ OTHER DEALINGS IN THE SOFTWARE.
36 36
 #define MB_AREA (MB_SIZE*MB_SIZE)
37 37
 
38 38
 #define VECTOR_MAX 6        //six or four entries per vector depending on format
39
-#define CODEBOOK_MAX 256
40
-#define CODEBOOK_NUM 5      //five potential codebooks (1, 4, 16, 64, 256) for V1 and V4
39
+#define CODEBOOK_MAX 256    //size of a codebook
41 40
 
42
-#define MAX_STRIPS  1       //Note: having fewer choices regarding the number of strip speeds up encoding (obviously)
41
+//#define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
42
+#define MAX_STRIPS  3       // This seems to be max for vintage players! -- rl
43
+// TODO: we might want to have a "vintage compatibilty" switch
43 44
 #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
45
+// MAX_STRIPS limits the maximum quality you can reach
46
+//            when you want high quality on high resolutions,
47
+// MIN_STRIPS limits the minimum efficiently encodable bit rate
48
+//            on low resolutions
49
+// the numbers are only used for brute force optimization for the first frame,
50
+// for the following frames they are adaptively readjusted
51
+// NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
52
+// of strips, currently 32
44 53
 
45 54
 typedef enum {
46 55
     MODE_V1_ONLY = 0,
... ...
@@ -53,42 +79,51 @@ typedef enum {
53 53
 typedef enum {
54 54
     ENC_V1,
55 55
     ENC_V4,
56
-    ENC_SKIP
56
+    ENC_SKIP,
57
+
58
+    ENC_UNCERTAIN
57 59
 } mb_encoding;
58 60
 
59 61
 typedef struct {
60 62
     int v1_vector;                  //index into v1 codebook
61 63
     int v1_error;                   //error when using V1 encoding
62
-    int v4_vector[CODEBOOK_NUM][4]; //indices into v4 codebooks
63
-    int v4_error[CODEBOOK_NUM];     //error when using V4 encodings
64
+    int v4_vector[4];               //indices into v4 codebook
65
+    int v4_error;                   //error when using V4 encoding
64 66
     int skip_error;                 //error when block is skipped (aka copied from last frame)
65 67
     mb_encoding best_encoding;      //last result from calculate_mode_score()
66 68
 } mb_info;
67 69
 
68 70
 typedef struct {
69 71
     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
70
-    int *v4_codebook;
72
+    int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
73
+    int v1_size;
74
+    int v4_size;
75
+    CinepakMode mode;
71 76
 } strip_info;
72 77
 
73 78
 typedef struct {
74 79
     AVCodecContext *avctx;
75
-    unsigned char *pict_bufs[3], *strip_buf, *frame_buf;
76
-    AVFrame last_frame;
77
-    AVFrame best_frame;
78
-    AVFrame scratch_frame;
80
+    unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
81
+    AVFrame *last_frame;
82
+    AVFrame *best_frame;
83
+    AVFrame *scratch_frame;
84
+    AVFrame *input_frame;
79 85
     enum AVPixelFormat pix_fmt;
80 86
     int w, h;
87
+    int frame_buf_size;
81 88
     int curframe, keyint;
82 89
     AVLFG randctx;
83 90
     uint64_t lambda;
84 91
     int *codebook_input;
85 92
     int *codebook_closest;
86 93
     mb_info *mb;                                //MB RD state
94
+    int min_strips;          //the current limit
95
+    int max_strips;          //the current limit
87 96
 #ifdef CINEPAKENC_DEBUG
88 97
     mb_info *best_mb;                           //TODO: remove. only used for printing stats
89
-#endif
90 98
     int num_v1_mode, num_v4_mode, num_mc_mode;
91 99
     int num_v1_encs, num_v4_encs, num_skips;
100
+#endif
92 101
 } CinepakEncContext;
93 102
 
94 103
 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
... ...
@@ -102,20 +137,33 @@ static av_cold int cinepak_encode_init(AVCodecContext *avctx)
102 102
         return AVERROR(EINVAL);
103 103
     }
104 104
 
105
-    if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
105
+    if (!(s->last_frame = av_frame_alloc()))
106 106
         return AVERROR(ENOMEM);
107
+    if (!(s->best_frame = av_frame_alloc()))
108
+        goto enomem;
109
+    if (!(s->scratch_frame = av_frame_alloc()))
110
+        goto enomem;
111
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
112
+        if (!(s->input_frame = av_frame_alloc()))
113
+            goto enomem;
114
+
115
+    if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
116
+        goto enomem;
107 117
 
108 118
     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
109 119
         goto enomem;
110 120
 
111
-    for(x = 0; x < 3; x++)
112
-        if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
121
+    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
122
+        if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
113 123
             goto enomem;
114 124
 
115 125
     mb_count = avctx->width * avctx->height / MB_AREA;
116 126
 
117
-    //the largest possible chunk is 0x31 with all MBs encoded in V4 mode, which is 34 bits per MB
118
-    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16);
127
+    //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
128
+    //and full codebooks being replaced in INTER mode,
129
+    // which is 34 bits per MB
130
+    //and 2*256 extra flag bits per strip
131
+    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
119 132
 
120 133
     frame_buf_size = CVID_HEADER_SIZE + MAX_STRIPS * strip_buf_size;
121 134
 
... ...
@@ -137,113 +185,221 @@ static av_cold int cinepak_encode_init(AVCodecContext *avctx)
137 137
     s->avctx = avctx;
138 138
     s->w = avctx->width;
139 139
     s->h = avctx->height;
140
+    s->frame_buf_size = frame_buf_size;
140 141
     s->curframe = 0;
141 142
     s->keyint = avctx->keyint_min;
142 143
     s->pix_fmt = avctx->pix_fmt;
143 144
 
144 145
     //set up AVFrames
145
-    s->last_frame.data[0]        = s->pict_bufs[0];
146
-    s->last_frame.linesize[0]    = s->w;
147
-    s->best_frame.data[0]        = s->pict_bufs[1];
148
-    s->best_frame.linesize[0]    = s->w;
149
-    s->scratch_frame.data[0]     = s->pict_bufs[2];
150
-    s->scratch_frame.linesize[0] = s->w;
151
-
152
-    if(s->pix_fmt == AV_PIX_FMT_YUV420P) {
153
-        s->last_frame.data[1]        = s->last_frame.data[0] + s->w * s->h;
154
-        s->last_frame.data[2]        = s->last_frame.data[1] + ((s->w * s->h) >> 2);
155
-        s->last_frame.linesize[1]    = s->last_frame.linesize[2] = s->w >> 1;
156
-
157
-        s->best_frame.data[1]        = s->best_frame.data[0] + s->w * s->h;
158
-        s->best_frame.data[2]        = s->best_frame.data[1] + ((s->w * s->h) >> 2);
159
-        s->best_frame.linesize[1]    = s->best_frame.linesize[2] = s->w >> 1;
160
-
161
-        s->scratch_frame.data[1]     = s->scratch_frame.data[0] + s->w * s->h;
162
-        s->scratch_frame.data[2]     = s->scratch_frame.data[1] + ((s->w * s->h) >> 2);
163
-        s->scratch_frame.linesize[1] = s->scratch_frame.linesize[2] = s->w >> 1;
146
+    s->last_frame->data[0]        = s->pict_bufs[0];
147
+    s->last_frame->linesize[0]    = s->w;
148
+    s->best_frame->data[0]        = s->pict_bufs[1];
149
+    s->best_frame->linesize[0]    = s->w;
150
+    s->scratch_frame->data[0]     = s->pict_bufs[2];
151
+    s->scratch_frame->linesize[0] = s->w;
152
+
153
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
154
+        s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
155
+        s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
156
+        s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
157
+
158
+        s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
159
+        s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
160
+        s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
161
+
162
+        s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
163
+        s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
164
+        s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
165
+
166
+        s->input_frame->data[0]       = s->pict_bufs[3];
167
+        s->input_frame->linesize[0]   = s->w;
168
+        s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
169
+        s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
170
+        s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
164 171
     }
165 172
 
173
+    s->min_strips = MIN_STRIPS;
174
+    s->max_strips = MAX_STRIPS;
175
+
176
+#ifdef CINEPAKENC_DEBUG
166 177
     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
178
+#endif
167 179
 
168 180
     return 0;
169 181
 
170 182
 enomem:
171
-    av_free(s->codebook_input);
172
-    av_free(s->codebook_closest);
173
-    av_free(s->strip_buf);
174
-    av_free(s->frame_buf);
175
-    av_free(s->mb);
183
+    av_frame_free(&s->last_frame);
184
+    av_frame_free(&s->best_frame);
185
+    av_frame_free(&s->scratch_frame);
186
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
187
+        av_frame_free(&s->input_frame);
188
+    av_freep(&s->codebook_input);
189
+    av_freep(&s->codebook_closest);
190
+    av_freep(&s->strip_buf);
191
+    av_freep(&s->frame_buf);
192
+    av_freep(&s->mb);
176 193
 #ifdef CINEPAKENC_DEBUG
177
-    av_free(s->best_mb);
194
+    av_freep(&s->best_mb);
178 195
 #endif
179 196
 
180
-    for(x = 0; x < 3; x++)
181
-        av_free(s->pict_bufs[x]);
197
+    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
198
+        av_freep(&s->pict_bufs[x]);
182 199
 
183 200
     return AVERROR(ENOMEM);
184 201
 }
185 202
 
186
-static int64_t calculate_mode_score(CinepakEncContext *s, CinepakMode mode, int h, int v1_size, int v4_size, int v4, strip_info *info)
203
+static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
204
+#ifdef CINEPAK_REPORT_SERR
205
+, int64_t *serr
206
+#endif
207
+)
187 208
 {
188 209
     //score = FF_LAMBDA_SCALE * error + lambda * bits
189 210
     int x;
190
-    int entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4;
211
+    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
191 212
     int mb_count = s->w * h / MB_AREA;
192 213
     mb_info *mb;
193 214
     int64_t score1, score2, score3;
194
-    int64_t ret = s->lambda * ((v1_size ? CHUNK_HEADER_SIZE + v1_size * entry_size : 0) +
195
-                   (v4_size ? CHUNK_HEADER_SIZE + v4_size * entry_size : 0) +
215
+    int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
216
+                   (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
196 217
                    CHUNK_HEADER_SIZE) << 3;
197 218
 
198
-    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9li score mb_count %i", v1_size, v4_size, ret, mb_count);
219
+    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
220
+
221
+#ifdef CINEPAK_REPORT_SERR
222
+    *serr = 0;
223
+#endif
199 224
 
200
-    switch(mode) {
225
+    switch(info->mode) {
201 226
     case MODE_V1_ONLY:
202 227
         //one byte per MB
203 228
         ret += s->lambda * 8 * mb_count;
204 229
 
230
+// while calculating we assume all blocks are ENC_V1
205 231
         for(x = 0; x < mb_count; x++) {
206 232
             mb = &s->mb[x];
207 233
             ret += FF_LAMBDA_SCALE * mb->v1_error;
234
+#ifdef CINEPAK_REPORT_SERR
235
+            *serr += mb->v1_error;
236
+#endif
237
+// this function is never called for report in MODE_V1_ONLY
238
+//            if(!report)
208 239
             mb->best_encoding = ENC_V1;
209 240
         }
210 241
 
211 242
         break;
212 243
     case MODE_V1_V4:
213 244
         //9 or 33 bits per MB
214
-        for(x = 0; x < mb_count; x++) {
215
-            mb = &s->mb[x];
216
-            score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
217
-            score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error[v4];
218
-
219
-            if(score1 <= score2) {
245
+        if(report) {
246
+// no moves between the corresponding training sets are allowed
247
+            *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
248
+            for(x = 0; x < mb_count; x++) {
249
+                int mberr;
250
+                mb = &s->mb[x];
251
+                if(mb->best_encoding == ENC_V1)
252
+                    score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
253
+                else
254
+                    score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
220 255
                 ret += score1;
221
-                mb->best_encoding = ENC_V1;
222
-            } else {
223
-                ret += score2;
224
-                mb->best_encoding = ENC_V4;
256
+#ifdef CINEPAK_REPORT_SERR
257
+                *serr += mberr;
258
+#endif
259
+            }
260
+        } else { // find best mode per block
261
+            for(x = 0; x < mb_count; x++) {
262
+                mb = &s->mb[x];
263
+                score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
264
+                score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
265
+
266
+                if(score1 <= score2) {
267
+                    ret += score1;
268
+#ifdef CINEPAK_REPORT_SERR
269
+                    *serr += mb->v1_error;
270
+#endif
271
+                    mb->best_encoding = ENC_V1;
272
+                } else {
273
+                    ret += score2;
274
+#ifdef CINEPAK_REPORT_SERR
275
+                    *serr += mb->v4_error;
276
+#endif
277
+                    mb->best_encoding = ENC_V4;
278
+                }
225 279
             }
226 280
         }
227 281
 
228 282
         break;
229 283
     case MODE_MC:
230 284
         //1, 10 or 34 bits per MB
231
-        for(x = 0; x < mb_count; x++) {
232
-            mb = &s->mb[x];
233
-            score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
234
-            score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
235
-            score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error[v4];
236
-
237
-
238
-            if(score1 <= score2 && score1 <= score3) {
239
-                ret += score1;
240
-                mb->best_encoding = ENC_SKIP;
241
-            } else if(score2 <= score1 && score2 <= score3) {
242
-                ret += score2;
243
-                mb->best_encoding = ENC_V1;
244
-            } else {
245
-                ret += score3;
246
-                mb->best_encoding = ENC_V4;
285
+        if(report) {
286
+            int v1_shrunk = 0, v4_shrunk = 0;
287
+            for(x = 0; x < mb_count; x++) {
288
+                mb = &s->mb[x];
289
+// it is OK to move blocks to ENC_SKIP here
290
+// but not to any codebook encoding!
291
+                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
292
+                if(mb->best_encoding == ENC_SKIP) {
293
+                    ret += score1;
294
+#ifdef CINEPAK_REPORT_SERR
295
+                    *serr += mb->skip_error;
296
+#endif
297
+                } else if(mb->best_encoding == ENC_V1) {
298
+                    if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
299
+                        mb->best_encoding = ENC_SKIP;
300
+                        ++v1_shrunk;
301
+                        ret += score1;
302
+#ifdef CINEPAK_REPORT_SERR
303
+                        *serr += mb->skip_error;
304
+#endif
305
+                    } else {
306
+                        ret += score2;
307
+#ifdef CINEPAK_REPORT_SERR
308
+                        *serr += mb->v1_error;
309
+#endif
310
+                    }
311
+                } else {
312
+                    if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
313
+                        mb->best_encoding = ENC_SKIP;
314
+                        ++v4_shrunk;
315
+                        ret += score1;
316
+#ifdef CINEPAK_REPORT_SERR
317
+                        *serr += mb->skip_error;
318
+#endif
319
+                    } else {
320
+                        ret += score3;
321
+#ifdef CINEPAK_REPORT_SERR
322
+                        *serr += mb->v4_error;
323
+#endif
324
+                    }
325
+                }
326
+            }
327
+            *training_set_v1_shrunk = v1_shrunk;
328
+            *training_set_v4_shrunk = v4_shrunk;
329
+        } else { // find best mode per block
330
+            for(x = 0; x < mb_count; x++) {
331
+                mb = &s->mb[x];
332
+                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
333
+                score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
334
+                score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
335
+
336
+                if(score1 <= score2 && score1 <= score3) {
337
+                    ret += score1;
338
+#ifdef CINEPAK_REPORT_SERR
339
+                    *serr += mb->skip_error;
340
+#endif
341
+                    mb->best_encoding = ENC_SKIP;
342
+                } else if(score2 <= score3) {
343
+                    ret += score2;
344
+#ifdef CINEPAK_REPORT_SERR
345
+                    *serr += mb->v1_error;
346
+#endif
347
+                    mb->best_encoding = ENC_V1;
348
+                } else {
349
+                    ret += score3;
350
+#ifdef CINEPAK_REPORT_SERR
351
+                    *serr += mb->v4_error;
352
+#endif
353
+                    mb->best_encoding = ENC_V4;
354
+                }
247 355
             }
248 356
         }
249 357
 
... ...
@@ -262,13 +418,45 @@ static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size
262 262
 
263 263
 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
264 264
 {
265
-    int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4;
266
-
267
-    ret = write_chunk_header(buf, s->pix_fmt == AV_PIX_FMT_YUV420P ? chunk_type_yuv : chunk_type_gray, entry_size * size);
268
-
269
-    for(x = 0; x < size; x++)
270
-        for(y = 0; y < entry_size; y++)
271
-            buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
265
+    int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
266
+    int incremental_codebook_replacement_mode = 0; // hardcoded here,
267
+                // the compiler should notice that this is a constant -- rl
268
+
269
+    ret = write_chunk_header(buf,
270
+          s->pix_fmt == AV_PIX_FMT_RGB24 ?
271
+           chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
272
+           chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
273
+          entry_size * size
274
+           + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
275
+
276
+// we do codebook encoding according to the "intra" mode
277
+// but we keep the "dead" code for reference in case we will want
278
+// to use incremental codebook updates (which actually would give us
279
+// "kind of" motion compensation, especially in 1 strip/frame case) -- rl
280
+// (of course, the code will be not useful as-is)
281
+    if(incremental_codebook_replacement_mode) {
282
+        int flags = 0;
283
+        int flagsind;
284
+        for(x = 0; x < size; x++) {
285
+            if(flags == 0) {
286
+                flagsind = ret;
287
+                ret += 4;
288
+                flags = 0x80000000;
289
+            } else
290
+                flags = ((flags>>1) | 0x80000000);
291
+            for(y = 0; y < entry_size; y++)
292
+                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
293
+            if((flags&0xffffffff) == 0xffffffff) {
294
+                AV_WB32(&buf[flagsind], flags);
295
+                flags = 0;
296
+            }
297
+        }
298
+        if(flags)
299
+            AV_WB32(&buf[flagsind], flags);
300
+    } else
301
+        for(x = 0; x < size; x++)
302
+            for(y = 0; y < entry_size; y++)
303
+                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
272 304
 
273 305
     return ret;
274 306
 }
... ...
@@ -279,7 +467,7 @@ static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, A
279 279
     out->data[0] = in->data[0] + x + y * in->linesize[0];
280 280
     out->linesize[0] = in->linesize[0];
281 281
 
282
-    if(s->pix_fmt == AV_PIX_FMT_YUV420P) {
282
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
283 283
         out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
284 284
         out->linesize[1] = in->linesize[1];
285 285
 
... ...
@@ -289,47 +477,47 @@ static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, A
289 289
 }
290 290
 
291 291
 //decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
292
-static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, mb_info *mb, strip_info *info)
292
+static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
293 293
 {
294
-    int entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4;
294
+    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
295 295
 
296 296
     sub_pict->data[0][0] =
297 297
             sub_pict->data[0][1] =
298 298
             sub_pict->data[0][    sub_pict->linesize[0]] =
299
-            sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size];
299
+            sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
300 300
 
301 301
     sub_pict->data[0][2] =
302 302
             sub_pict->data[0][3] =
303 303
             sub_pict->data[0][2+  sub_pict->linesize[0]] =
304
-            sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size+1];
304
+            sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
305 305
 
306 306
     sub_pict->data[0][2*sub_pict->linesize[0]] =
307 307
             sub_pict->data[0][1+2*sub_pict->linesize[0]] =
308 308
             sub_pict->data[0][  3*sub_pict->linesize[0]] =
309
-            sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size+2];
309
+            sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
310 310
 
311 311
     sub_pict->data[0][2+2*sub_pict->linesize[0]] =
312 312
             sub_pict->data[0][3+2*sub_pict->linesize[0]] =
313 313
             sub_pict->data[0][2+3*sub_pict->linesize[0]] =
314
-            sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[mb->v1_vector*entry_size+3];
314
+            sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
315 315
 
316
-    if(s->pix_fmt == AV_PIX_FMT_YUV420P) {
316
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
317 317
         sub_pict->data[1][0] =
318 318
             sub_pict->data[1][1] =
319 319
             sub_pict->data[1][    sub_pict->linesize[1]] =
320
-            sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[mb->v1_vector*entry_size+4];
320
+            sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
321 321
 
322 322
         sub_pict->data[2][0] =
323 323
             sub_pict->data[2][1] =
324 324
             sub_pict->data[2][    sub_pict->linesize[2]] =
325
-            sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[mb->v1_vector*entry_size+5];
325
+            sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
326 326
     }
327 327
 }
328 328
 
329 329
 //decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
330 330
 static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
331 331
 {
332
-    int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4;
332
+    int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
333 333
 
334 334
     for(i = y = 0; y < 4; y += 2) {
335 335
         for(x = 0; x < 4; x += 2, i++) {
... ...
@@ -338,7 +526,7 @@ static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_
338 338
             sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
339 339
             sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
340 340
 
341
-            if(s->pix_fmt == AV_PIX_FMT_YUV420P) {
341
+            if(s->pix_fmt == AV_PIX_FMT_RGB24) {
342 342
                 sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
343 343
                 sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
344 344
             }
... ...
@@ -346,39 +534,62 @@ static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_
346 346
     }
347 347
 }
348 348
 
349
-static int encode_mode(CinepakEncContext *s, CinepakMode mode, int h, int v1_size, int v4_size, int v4, AVPicture *scratch_pict, strip_info *info, unsigned char *buf)
349
+static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
350
+{
351
+    int y, p;
352
+
353
+    for(y = 0; y < MB_SIZE; y++) {
354
+        memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
355
+               MB_SIZE);
356
+    }
357
+
358
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
359
+        for(p = 1; p <= 2; p++) {
360
+            for(y = 0; y < MB_SIZE/2; y++) {
361
+                memcpy(a->data[p] + y*a->linesize[p],
362
+                       b->data[p] + y*b->linesize[p],
363
+                       MB_SIZE/2);
364
+            }
365
+        }
366
+    }
367
+}
368
+
369
+static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
350 370
 {
351 371
     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
352 372
     int needs_extra_bit, should_write_temp;
353 373
     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
354 374
     mb_info *mb;
355
-    AVPicture sub_scratch;
375
+    AVPicture sub_scratch, sub_last;
356 376
 
357 377
     //encode codebooks
358
-    if(v1_size)
359
-        ret += encode_codebook(s, info->v1_codebook, v1_size, 0x22, 0x26, buf + ret);
378
+////// MacOS vintage decoder compatibility dictates the presence of
379
+////// the codebook chunk even when the codebook is empty - pretty dumb...
380
+////// and also the certain order of the codebook chunks -- rl
381
+//    if(info->v4_size)
382
+        ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
360 383
 
361
-    if(v4_size)
362
-        ret += encode_codebook(s, info->v4_codebook, v4_size, 0x20, 0x24, buf + ret);
384
+//    if(info->v1_size)
385
+        ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
363 386
 
364 387
     //update scratch picture
365 388
     for(z = y = 0; y < h; y += MB_SIZE) {
366 389
         for(x = 0; x < s->w; x += MB_SIZE, z++) {
367 390
             mb = &s->mb[z];
368 391
 
369
-            if(mode == MODE_MC && mb->best_encoding == ENC_SKIP)
370
-                continue;
371
-
372 392
             get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
373 393
 
374
-            if(mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
375
-                decode_v1_vector(s, &sub_scratch, mb, info);
376
-            else if(mode != MODE_V1_ONLY && mb->best_encoding == ENC_V4)
377
-                decode_v4_vector(s, &sub_scratch, mb->v4_vector[v4], info);
394
+            if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
395
+                get_sub_picture(s, x, y, last_pict, &sub_last);
396
+                copy_mb(s, &sub_scratch, &sub_last);
397
+            } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
398
+                decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
399
+            else
400
+                decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
378 401
         }
379 402
     }
380 403
 
381
-    switch(mode) {
404
+    switch(info->mode) {
382 405
     case MODE_V1_ONLY:
383 406
         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
384 407
         ret += write_chunk_header(buf + ret, 0x32, mb_count);
... ...
@@ -408,7 +619,7 @@ static int encode_mode(CinepakEncContext *s, CinepakMode mode, int h, int v1_siz
408 408
                     buf[ret++] = mb->v1_vector;
409 409
                 else
410 410
                     for(z = 0; z < 4; z++)
411
-                        buf[ret++] = mb->v4_vector[v4][z];
411
+                        buf[ret++] = mb->v4_vector[z];
412 412
             }
413 413
         }
414 414
 
... ...
@@ -456,7 +667,7 @@ static int encode_mode(CinepakEncContext *s, CinepakMode mode, int h, int v1_siz
456 456
                 temp[temp_size++] = mb->v1_vector;
457 457
             else if(mb->best_encoding == ENC_V4)
458 458
                 for(z = 0; z < 4; z++)
459
-                    temp[temp_size++] = mb->v4_vector[v4][z];
459
+                    temp[temp_size++] = mb->v4_vector[z];
460 460
 
461 461
             if(should_write_temp) {
462 462
                 memcpy(&buf[ret], temp, temp_size);
... ...
@@ -492,7 +703,7 @@ static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *
492 492
         }
493 493
     }
494 494
 
495
-    if(s->pix_fmt == AV_PIX_FMT_YUV420P) {
495
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
496 496
         for(p = 1; p <= 2; p++) {
497 497
             for(y = 0; y < MB_SIZE/2; y++) {
498 498
                 for(x = 0; x < MB_SIZE/2; x++) {
... ...
@@ -506,19 +717,30 @@ static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *
506 506
     return ret;
507 507
 }
508 508
 
509
-static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, int size, int v4, strip_info *info)
509
+// return the possibly adjusted size of the codebook
510
+#define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
511
+static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
512
+                    int v1mode, strip_info *info,
513
+                    mb_encoding encoding)
510 514
 {
511
-    int x, y, i, j, k, x2, y2, x3, y3, plane, shift;
512
-    int entry_size = s->pix_fmt == AV_PIX_FMT_YUV420P ? 6 : 4;
515
+    int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
516
+    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
513 517
     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
518
+    int size = v1mode ? info->v1_size : info->v4_size;
514 519
     int64_t total_error = 0;
515 520
     uint8_t vq_pict_buf[(MB_AREA*3)/2];
516 521
     AVPicture sub_pict, vq_pict;
517 522
 
518
-    for(i = y = 0; y < h; y += MB_SIZE) {
519
-        for(x = 0; x < s->w; x += MB_SIZE, i += v1mode ? 1 : 4) {
520
-            int *base = s->codebook_input + i*entry_size;
523
+    for(mbn = i = y = 0; y < h; y += MB_SIZE) {
524
+        for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
525
+            int *base;
521 526
 
527
+            if(CERTAIN(encoding)) {
528
+// use for the training only the blocks known to be to be encoded [sic:-]
529
+               if(s->mb[mbn].best_encoding != encoding) continue;
530
+            }
531
+
532
+            base = s->codebook_input + i*entry_size;
522 533
             if(v1mode) {
523 534
                 //subsample
524 535
                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
... ...
@@ -553,8 +775,19 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, in
553 553
                     }
554 554
                 }
555 555
             }
556
+            i += v1mode ? 1 : 4;
556 557
         }
557 558
     }
559
+//    if(i < mbn*(v1mode ? 1 : 4)) {
560
+//        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
561
+//    }
562
+
563
+    if(i == 0) // empty training set, nothing to do
564
+        return 0;
565
+    if(i < size) {
566
+        //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
567
+        size = i;
568
+    }
558 569
 
559 570
     ff_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
560 571
     ff_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
... ...
@@ -568,8 +801,11 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, in
568 568
 
569 569
     //copy indices
570 570
     for(i = j = y = 0; y < h; y += MB_SIZE) {
571
-        for(x = 0; x < s->w; x += MB_SIZE, j++, i += v1mode ? 1 : 4) {
571
+        for(x = 0; x < s->w; x += MB_SIZE, j++) {
572 572
             mb_info *mb = &s->mb[j];
573
+// skip uninteresting blocks if we know their preferred encoding
574
+            if(CERTAIN(encoding) && mb->best_encoding != encoding)
575
+                continue;
573 576
 
574 577
             //point sub_pict to current MB
575 578
             get_sub_picture(s, x, y, pict, &sub_pict);
... ...
@@ -578,26 +814,29 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict, int v1mode, in
578 578
                 mb->v1_vector = s->codebook_closest[i];
579 579
 
580 580
                 //fill in vq_pict with V1 data
581
-                decode_v1_vector(s, &vq_pict, mb, info);
581
+                decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
582 582
 
583 583
                 mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
584 584
                 total_error += mb->v1_error;
585 585
             } else {
586 586
                 for(k = 0; k < 4; k++)
587
-                    mb->v4_vector[v4][k] = s->codebook_closest[i+k];
587
+                    mb->v4_vector[k] = s->codebook_closest[i+k];
588 588
 
589 589
                 //fill in vq_pict with V4 data
590
-                decode_v4_vector(s, &vq_pict, mb->v4_vector[v4], info);
590
+                decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
591 591
 
592
-                mb->v4_error[v4] = compute_mb_distortion(s, &sub_pict, &vq_pict);
593
-                total_error += mb->v4_error[v4];
592
+                mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
593
+                total_error += mb->v4_error;
594 594
             }
595
+            i += v1mode ? 1 : 4;
595 596
         }
596 597
     }
598
+// check that we did it right in the beginning of the function
599
+    av_assert0(i >= size); // training set is no smaller than the codebook
597 600
 
598
-    //av_log(s->avctx, AV_LOG_INFO, "mode %i size %i i %i error %li\n", v1mode, size, i, total_error);
601
+    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
599 602
 
600
-    return 0;
603
+    return size;
601 604
 }
602 605
 
603 606
 static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
... ...
@@ -617,59 +856,151 @@ static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_p
617 617
 
618 618
 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
619 619
 {
620
-    buf[0] = keyframe ? 0x11: 0x10;
620
+// actually we are exclusively using intra strip coding (how much can we win
621
+// otherwise? how to choose which part of a codebook to update?),
622
+// keyframes are different only because we disallow ENC_SKIP on them -- rl
623
+// (besides, the logic here used to be inverted: )
624
+//    buf[0] = keyframe ? 0x11: 0x10;
625
+    buf[0] = keyframe ? 0x10: 0x11;
621 626
     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
622
-    AV_WB16(&buf[4], y);
627
+//    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
628
+    AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
623 629
     AV_WB16(&buf[6], 0);
624
-    AV_WB16(&buf[8], h);
630
+//    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
631
+    AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
625 632
     AV_WB16(&buf[10], s->w);
633
+    //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
626 634
 }
627 635
 
628
-static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score)
636
+static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
637
+#ifdef CINEPAK_REPORT_SERR
638
+, int64_t *best_serr
639
+#endif
640
+)
629 641
 {
630 642
     int64_t score = 0;
631
-    int best_size = 0, v1_size, v4_size, v4, mb_count = s->w * h / MB_AREA;
643
+#ifdef CINEPAK_REPORT_SERR
644
+    int64_t serr;
645
+#endif
646
+    int best_size = 0;
632 647
     strip_info info;
633
-    CinepakMode best_mode;
634
-    int v4_codebooks[CODEBOOK_NUM][CODEBOOK_MAX*VECTOR_MAX];
648
+// for codebook optimization:
649
+    int v1enough, v1_size, v4enough, v4_size;
650
+    int new_v1_size, new_v4_size;
651
+    int v1shrunk, v4shrunk;
635 652
 
636 653
     if(!keyframe)
637 654
         calculate_skip_errors(s, h, last_pict, pict, &info);
638 655
 
639
-    //precompute V4 codebooks
640
-    for(v4_size = 1, v4 = 0; v4_size <= 256; v4_size <<= 2, v4++) {
641
-        info.v4_codebook = v4_codebooks[v4];
642
-        quantize(s, h, pict, 0, v4_size, v4, &info);
643
-    }
644
-
645
-    //try all powers of 4 for the size of the codebooks
646
-    //constraint the v4 codebook to be no bigger than the v1 codebook
647
-    for(v1_size = 1; v1_size <= 256; v1_size <<= 2) {
648
-        //compute V1 codebook
649
-        quantize(s, h, pict, 1, v1_size, -1, &info);
650
-
651
-        for(v4_size = 0, v4 = -1; v4_size <= v1_size; v4_size = v4_size ? v4_size << 2 : v1_size >= 4 ? v1_size >> 2 : 1, v4++) {
656
+    //try some powers of 4 for the size of the codebooks
657
+    //constraint the v4 codebook to be no bigger than v1 one,
658
+    //(and no less than v1_size/4)
659
+    //thus making v1 preferable and possibly losing small details? should be ok
660
+#define SMALLEST_CODEBOOK 1
661
+    for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
662
+        for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
652 663
             //try all modes
653 664
             for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
654
-                //don't allow MODE_MC in inter frames
665
+                //don't allow MODE_MC in intra frames
655 666
                 if(keyframe && mode == MODE_MC)
656 667
                     continue;
657 668
 
658
-                //only allow V1-only mode if v4 codebook is empty
659
-                if(!v4_size && mode != MODE_V1_ONLY)
660
-                    continue;
669
+                if(mode == MODE_V1_ONLY) {
670
+                    info.v1_size = v1_size;
671
+// the size may shrink even before optimizations if the input is short:
672
+                    info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
673
+                    if(info.v1_size < v1_size)
674
+// too few eligible blocks, no sense in trying bigger sizes
675
+                        v1enough = 1;
676
+
677
+                    info.v4_size = 0;
678
+                } else { // mode != MODE_V1_ONLY
679
+                    // if v4 codebook is empty then only allow V1-only mode
680
+                    if(!v4_size)
681
+                        continue;
682
+
683
+                    if(mode == MODE_V1_V4) {
684
+                        info.v4_size = v4_size;
685
+                        info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
686
+                        if(info.v4_size < v4_size)
687
+// too few eligible blocks, no sense in trying bigger sizes
688
+                            v4enough = 1;
689
+                    }
690
+                }
661 691
 
662
-                info.v4_codebook = v4 >= 0 ? v4_codebooks[v4] : NULL;
663
-                score = calculate_mode_score(s, mode, h, v1_size, v4_size, v4, &info);
692
+                info.mode = mode;
693
+// choose the best encoding per block, based on current experience
694
+                score = calculate_mode_score(s, h, &info, 0,
695
+                                             &v1shrunk, &v4shrunk
696
+#ifdef CINEPAK_REPORT_SERR
697
+, &serr
698
+#endif
699
+);
700
+
701
+                if(mode != MODE_V1_ONLY){
702
+// recompute the codebooks, omitting the extra blocks
703
+// we assume we _may_ come here with more blocks to encode than before
704
+                    info.v1_size = v1_size;
705
+                    new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
706
+                    if(new_v1_size < info.v1_size){
707
+                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
708
+                        info.v1_size = new_v1_size;
709
+                    }
710
+// we assume we _may_ come here with more blocks to encode than before
711
+                    info.v4_size = v4_size;
712
+                    new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
713
+                    if(new_v4_size < info.v4_size) {
714
+                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
715
+                        info.v4_size = new_v4_size;
716
+                    }
717
+// calculate the resulting score
718
+// (do not move blocks to codebook encodings now, as some blocks may have
719
+// got bigger errors despite a smaller training set - but we do not
720
+// ever grow the training sets back)
721
+                    for(;;) {
722
+                        score = calculate_mode_score(s, h, &info, 1,
723
+                                                     &v1shrunk, &v4shrunk
724
+#ifdef CINEPAK_REPORT_SERR
725
+, &serr
726
+#endif
727
+);
728
+// do we have a reason to reiterate?
729
+                        if(!v1shrunk && !v4shrunk) break;
730
+// recompute the codebooks, omitting the extra blocks
731
+                        if(v1shrunk) {
732
+                            info.v1_size = v1_size;
733
+                            new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
734
+                            if(new_v1_size < info.v1_size){
735
+                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
736
+                                info.v1_size = new_v1_size;
737
+                            }
738
+                        }
739
+                        if(v4shrunk) {
740
+                            info.v4_size = v4_size;
741
+                            new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
742
+                            if(new_v4_size < info.v4_size) {
743
+                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
744
+                                info.v4_size = new_v4_size;
745
+                            }
746
+                        }
747
+                    }
748
+                }
664 749
 
665
-                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %li\n", v1_size, v4_size, score);
750
+                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
666 751
 
667 752
                 if(best_size == 0 || score < *best_score) {
753
+
668 754
                     *best_score = score;
669
-                    best_size = encode_mode(s, mode, h, v1_size, v4_size, v4, scratch_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
670
-                    best_mode = mode;
755
+#ifdef CINEPAK_REPORT_SERR
756
+                    *best_serr = serr;
757
+#endif
758
+                    best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
671 759
 
672
-                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18li %i B\n", mode, v1_size, v4_size, score, best_size);
760
+                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
761
+                    //av_log(s->avctx, AV_LOG_INFO, "\n");
762
+#ifdef CINEPAK_REPORT_SERR
763
+                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
764
+#endif
673 765
 
674 766
 #ifdef CINEPAKENC_DEBUG
675 767
                     //save MB encoding choices
... ...
@@ -678,6 +1009,7 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
678 678
 
679 679
                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
680 680
                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
681
+
681 682
                 }
682 683
             }
683 684
         }
... ...
@@ -685,11 +1017,11 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
685 685
 
686 686
 #ifdef CINEPAKENC_DEBUG
687 687
     //gather stats. this will only work properly of MAX_STRIPS == 1
688
-    if(best_mode == MODE_V1_ONLY) {
688
+    if(best_info.mode == MODE_V1_ONLY) {
689 689
         s->num_v1_mode++;
690 690
         s->num_v1_encs += s->w*h/MB_AREA;
691 691
     } else {
692
-        if(best_mode == MODE_V1_V4)
692
+        if(best_info.mode == MODE_V1_V4)
693 693
             s->num_v4_mode++;
694 694
         else
695 695
             s->num_mc_mode++;
... ...
@@ -708,13 +1040,12 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
708 708
     best_size += STRIP_HEADER_SIZE;
709 709
     memcpy(buf, s->strip_buf, best_size);
710 710
 
711
-
712 711
     return best_size;
713 712
 }
714 713
 
715
-static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size)
714
+static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
716 715
 {
717
-    buf[0] = 0;
716
+    buf[0] = isakeyframe ? 0 : 1;
718 717
     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
719 718
     AV_WB16(&buf[4], s->w);
720 719
     AV_WB16(&buf[6], s->h);
... ...
@@ -723,67 +1054,175 @@ static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_s
723 723
     return CVID_HEADER_SIZE;
724 724
 }
725 725
 
726
-static int rd_frame(CinepakEncContext *s, AVFrame *frame, unsigned char *buf, int buf_size)
726
+static int rd_frame(CinepakEncContext *s, AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
727 727
 {
728
-    int num_strips, strip, h, i, y, size, temp_size, best_size;
728
+    int num_strips, strip, i, y, nexty, size, temp_size, best_size;
729 729
     AVPicture last_pict, pict, scratch_pict;
730 730
     int64_t best_score = 0, score, score_temp;
731
+#ifdef CINEPAK_REPORT_SERR
732
+    int64_t best_serr = 0, serr, serr_temp;
733
+#endif
731 734
 
732
-    //TODO: support encoding zero strips (meaning skip the whole frame)
733
-    for(num_strips = MIN_STRIPS; num_strips <= MAX_STRIPS && num_strips <= s->h / MB_SIZE; num_strips++) {
735
+    int best_nstrips;
736
+
737
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
738
+        int x;
739
+// build a copy of the given frame in the correct colorspace
740
+        for(y = 0; y < s->h; y += 2) {
741
+            for(x = 0; x < s->w; x += 2) {
742
+                uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
743
+                ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
744
+                ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
745
+                get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
746
+                r = g = b = 0;
747
+                for(i=0; i<4; ++i) {
748
+                    int i1, i2;
749
+                    i1 = (i&1); i2 = (i>=2);
750
+                    rr = ir[i2][i1*3+0];
751
+                    gg = ir[i2][i1*3+1];
752
+                    bb = ir[i2][i1*3+2];
753
+                    r += rr; g += gg; b += bb;
754
+// using fixed point arithmetic for portable repeatability, scaling by 2^23
755
+// "Y"
756
+//                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
757
+                    rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
758
+                    if(      rr <   0) rr =   0;
759
+                    else if (rr > 255) rr = 255;
760
+                    scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
761
+                }
762
+// let us scale down as late as possible
763
+//                r /= 4; g /= 4; b /= 4;
764
+// "U"
765
+//                rr = -0.1429*r - 0.2857*g + 0.4286*b;
766
+                rr = (-299683*r - 599156*g + 898839*b) >> 23;
767
+                if(      rr < -128) rr = -128;
768
+                else if (rr >  127) rr =  127;
769
+                scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
770
+// "V"
771
+//                rr = 0.3571*r - 0.2857*g - 0.0714*b;
772
+                rr = (748893*r - 599156*g - 149737*b) >> 23;
773
+                if(      rr < -128) rr = -128;
774
+                else if (rr >  127) rr =  127;
775
+                scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
776
+            }
777
+        }
778
+    }
779
+
780
+    //would be nice but quite certainly incompatible with vintage players:
781
+    // support encoding zero strips (meaning skip the whole frame)
782
+    for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
734 783
         score = 0;
735 784
         size = 0;
736
-        h = s->h / num_strips;
737
-        //make h into next multiple of 4
738
-        h += 4 - (h & 3);
785
+#ifdef CINEPAK_REPORT_SERR
786
+        serr = 0;
787
+#endif
788
+
789
+        for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
790
+            int strip_height;
739 791
 
740
-        for(strip = 0; strip < num_strips; strip++) {
741
-            y = strip*h;
792
+            nexty = strip * s->h / num_strips; // <= s->h
793
+            //make nexty the next multiple of 4 if not already there
794
+            if(nexty & 3)
795
+                nexty += 4 - (nexty & 3);
742 796
 
743
-            get_sub_picture(s, 0, y, (AVPicture*)frame,            &pict);
744
-            get_sub_picture(s, 0, y, (AVPicture*)&s->last_frame,    &last_pict);
745
-            get_sub_picture(s, 0, y, (AVPicture*)&s->scratch_frame, &scratch_pict);
797
+            strip_height = nexty - y;
798
+            if(strip_height <= 0) { // can this ever happen?
799
+                av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
800
+                continue;
801
+            }
802
+
803
+            if(s->pix_fmt == AV_PIX_FMT_RGB24)
804
+                get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
805
+            else
806
+                get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
807
+            get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
808
+            get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
746 809
 
747
-            if((temp_size = rd_strip(s, y, FFMIN(h, s->h - y), frame->key_frame, &last_pict, &pict, &scratch_pict, s->frame_buf + CVID_HEADER_SIZE, &score_temp)) < 0)
810
+            if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
811
+#ifdef CINEPAK_REPORT_SERR
812
+, &serr_temp
813
+#endif
814
+)) < 0)
748 815
                 return temp_size;
749 816
 
750 817
             score += score_temp;
818
+#ifdef CINEPAK_REPORT_SERR
819
+            serr += serr_temp;
820
+#endif
751 821
             size += temp_size;
822
+            //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
823
+            //av_log(s->avctx, AV_LOG_INFO, "\n");
752 824
         }
753 825
 
754 826
         if(best_score == 0 || score < best_score) {
755 827
             best_score = score;
756
-            best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size);
757
-            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12li, %i B\n", num_strips, score, best_size);
828
+#ifdef CINEPAK_REPORT_SERR
829
+            best_serr = serr;
830
+#endif
831
+            best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
832
+            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
833
+#ifdef CINEPAK_REPORT_SERR
834
+            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
835
+#endif
758 836
 
759
-            FFSWAP(AVFrame, s->best_frame, s->scratch_frame);
837
+            FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
838
+            memcpy(buf, s->frame_buf, best_size);
839
+            best_nstrips = num_strips;
760 840
         }
841
+// avoid trying too many strip numbers without a real reason
842
+// (this makes the processing of the very first frame faster)
843
+        if(num_strips - best_nstrips > 4)
844
+            break;
761 845
     }
762 846
 
763
-    memcpy(buf, s->frame_buf, best_size);
847
+// let the number of strips slowly adapt to the changes in the contents,
848
+// compared to full bruteforcing every time this will occasionally lead
849
+// to some r/d performance loss but makes encoding up to several times faster
850
+#ifdef CINEPAK_AGGRESSIVE_STRIP_NUMBER_ADAPTIVITY
851
+    s->max_strips = best_nstrips + 4;
852
+    if(s->max_strips >= MAX_STRIPS)
853
+        s->max_strips = MAX_STRIPS;
854
+    s->min_strips = best_nstrips - 4;
855
+    if(s->min_strips < MIN_STRIPS)
856
+        s->min_strips = MIN_STRIPS;
857
+#else
858
+    if(best_nstrips == s->max_strips) { // let us try to step up
859
+        s->max_strips = best_nstrips + 1;
860
+        if(s->max_strips >= MAX_STRIPS)
861
+            s->max_strips = MAX_STRIPS;
862
+    } else { // try to step down
863
+        s->max_strips = best_nstrips;
864
+    }
865
+    s->min_strips = s->max_strips - 1;
866
+    if(s->min_strips < MIN_STRIPS)
867
+        s->min_strips = MIN_STRIPS;
868
+#endif
764 869
 
765 870
     return best_size;
766 871
 }
767 872
 
768
-static int cinepak_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
873
+static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
874
+                                const AVFrame *frame, int *got_packet)
769 875
 {
770 876
     CinepakEncContext *s = avctx->priv_data;
771
-    AVFrame *frame = data;
772 877
     int ret;
773 878
 
774 879
     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
775 880
 
776
-    frame->key_frame = s->curframe == 0;
777
-    frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
881
+    if ((ret = ff_alloc_packet(pkt, s->frame_buf_size)) < 0)
882
+        return ret;
883
+    ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
884
+    pkt->size = ret;
885
+    if (s->curframe == 0)
886
+        pkt->flags |= AV_PKT_FLAG_KEY;
887
+    *got_packet = 1;
778 888
 
779
-    ret = rd_frame(s, frame, buf, buf_size);
780
-
781
-    FFSWAP(AVFrame, s->last_frame, s->best_frame);
889
+    FFSWAP(AVFrame *, s->last_frame, s->best_frame);
782 890
 
783 891
     if (++s->curframe >= s->keyint)
784 892
         s->curframe = 0;
785 893
 
786
-    return ret;
894
+    return 0;
787 895
 }
788 896
 
789 897
 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
... ...
@@ -791,20 +1230,27 @@ static av_cold int cinepak_encode_end(AVCodecContext *avctx)
791 791
     CinepakEncContext *s = avctx->priv_data;
792 792
     int x;
793 793
 
794
-    av_free(s->codebook_input);
795
-    av_free(s->codebook_closest);
796
-    av_free(s->strip_buf);
797
-    av_free(s->frame_buf);
798
-    av_free(s->mb);
794
+    av_frame_free(&s->last_frame);
795
+    av_frame_free(&s->best_frame);
796
+    av_frame_free(&s->scratch_frame);
797
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
798
+        av_frame_free(&s->input_frame);
799
+    av_freep(&s->codebook_input);
800
+    av_freep(&s->codebook_closest);
801
+    av_freep(&s->strip_buf);
802
+    av_freep(&s->frame_buf);
803
+    av_freep(&s->mb);
799 804
 #ifdef CINEPAKENC_DEBUG
800
-    av_free(s->best_mb);
805
+    av_freep(&s->best_mb);
801 806
 #endif
802 807
 
803
-    for(x = 0; x < 3; x++)
804
-        av_free(s->pict_bufs[x]);
808
+    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
809
+        av_freep(&s->pict_bufs[x]);
805 810
 
811
+#ifdef CINEPAKENC_DEBUG
806 812
     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
807 813
         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
814
+#endif
808 815
 
809 816
     return 0;
810 817
 }