Browse code

DNxHD (SMPTE VC-3) encoder

Originally committed as revision 10682 to svn://svn.ffmpeg.org/ffmpeg/trunk

Baptiste Coudurier authored on 2007/10/08 20:27:18
Showing 8 changed files
... ...
@@ -97,6 +97,7 @@ version <next>
97 97
 - Monkey's Audio demuxer and decoder
98 98
 - additional SPARC (VIS) optimizations
99 99
 - AMV audio and video decoder
100
+- DNxHD encoder
100 101
 
101 102
 version 0.4.9-pre1:
102 103
 
... ...
@@ -118,7 +118,7 @@ Codecs:
118 118
   cook.c, cookdata.h                    Benjamin Larsson
119 119
   cscd.c                                Reimar Doeffinger
120 120
   dca.c                                 Kostya Shishkov
121
-  dnxhddec.c                            Baptiste Coudurier
121
+  dnxhd*                                Baptiste Coudurier
122 122
   dpcm.c                                Mike Melanson
123 123
   dxa.c                                 Kostya Shishkov
124 124
   dv.c                                  Roman Shaposhnik
... ...
@@ -226,7 +226,7 @@ following image formats are supported:
226 226
 @item Cin Video              @tab     @tab  X @tab Codec used in Delphine Software games.
227 227
 @item Tiertex Seq Video      @tab     @tab  X @tab Codec used in DOS CDROM FlashBack game.
228 228
 @item DXA Video              @tab     @tab  X @tab Codec originally used in Feeble Files game.
229
-@item AVID DNxHD             @tab     @tab  X @tab aka SMPTE VC3
229
+@item AVID DNxHD             @tab   X @tab  X @tab aka SMPTE VC3
230 230
 @item C93 Video              @tab     @tab  X @tab Codec used in Cyberia game.
231 231
 @item THP                    @tab     @tab  X @tab Used on the Nintendo GameCube.
232 232
 @item Bethsoft VID           @tab     @tab  X @tab Used in some games from Bethesda Softworks.
... ...
@@ -56,6 +56,7 @@ OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o
56 56
 OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
57 57
 OBJS-$(CONFIG_DCA_DECODER)             += dca.o
58 58
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
59
+OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o
59 60
 OBJS-$(CONFIG_DSICINVIDEO_DECODER)     += dsicinav.o
60 61
 OBJS-$(CONFIG_DSICINAUDIO_DECODER)     += dsicinav.o
61 62
 OBJS-$(CONFIG_DVBSUB_DECODER)          += dvbsubdec.o
... ...
@@ -73,7 +73,7 @@ void avcodec_register_all(void)
73 73
     REGISTER_DECODER (CLJR, cljr);
74 74
     REGISTER_DECODER (CSCD, cscd);
75 75
     REGISTER_DECODER (CYUV, cyuv);
76
-    REGISTER_DECODER (DNXHD, dnxhd);
76
+    REGISTER_ENCDEC  (DNXHD, dnxhd);
77 77
     REGISTER_DECODER (DSICINVIDEO, dsicinvideo);
78 78
     REGISTER_ENCDEC  (DVVIDEO, dvvideo);
79 79
     REGISTER_DECODER (DXA, dxa);
... ...
@@ -33,8 +33,8 @@
33 33
 #define AV_STRINGIFY(s)         AV_TOSTRING(s)
34 34
 #define AV_TOSTRING(s) #s
35 35
 
36
-#define LIBAVCODEC_VERSION_INT  ((51<<16)+(44<<8)+0)
37
-#define LIBAVCODEC_VERSION      51.44.0
36
+#define LIBAVCODEC_VERSION_INT  ((51<<16)+(45<<8)+0)
37
+#define LIBAVCODEC_VERSION      51.45.0
38 38
 #define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
39 39
 
40 40
 #define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
41 41
new file mode 100644
... ...
@@ -0,0 +1,845 @@
0
+/*
1
+ * VC3/DNxHD encoder
2
+ * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
3
+ *
4
+ * VC-3 encoder funded by the British Broadcasting Corporation
5
+ *
6
+ * This file is part of FFmpeg.
7
+ *
8
+ * FFmpeg is free software; you can redistribute it and/or
9
+ * modify it under the terms of the GNU Lesser General Public
10
+ * License as published by the Free Software Foundation; either
11
+ * version 2.1 of the License, or (at your option) any later version.
12
+ *
13
+ * FFmpeg is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
+ * Lesser General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU Lesser General Public
19
+ * License along with FFmpeg; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ */
22
+
23
+//#define DEBUG
24
+#define RC_VARIANCE 1 // use variance or ssd for fast rc
25
+
26
+#include "avcodec.h"
27
+#include "dsputil.h"
28
+#include "mpegvideo.h"
29
+#include "dnxhddata.h"
30
+
31
+typedef struct {
32
+    uint16_t mb;
33
+    int value;
34
+} RCCMPEntry;
35
+
36
+typedef struct {
37
+    int ssd;
38
+    int bits;
39
+} RCEntry;
40
+
41
+int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
42
+
43
+typedef struct DNXHDEncContext {
44
+    MpegEncContext m; ///< Used for quantization dsp functions
45
+
46
+    AVFrame frame;
47
+    int cid;
48
+    const CIDEntry *cid_table;
49
+    uint8_t *msip; ///< Macroblock Scan Indices Payload
50
+    uint32_t *slice_size;
51
+
52
+    struct DNXHDEncContext *thread[MAX_THREADS];
53
+
54
+    unsigned dct_y_offset;
55
+    unsigned dct_uv_offset;
56
+    int interlaced;
57
+    int cur_field;
58
+
59
+    DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]);
60
+
61
+    int      (*qmatrix_c)     [64];
62
+    int      (*qmatrix_l)     [64];
63
+    uint16_t (*qmatrix_l16)[2][64];
64
+    uint16_t (*qmatrix_c16)[2][64];
65
+
66
+    unsigned frame_bits;
67
+    uint8_t *src[3];
68
+
69
+    uint16_t *table_vlc_codes;
70
+    uint8_t  *table_vlc_bits;
71
+    uint16_t *table_run_codes;
72
+    uint8_t  *table_run_bits;
73
+
74
+    /** Rate control */
75
+    unsigned slice_bits;
76
+    unsigned qscale;
77
+    unsigned lambda;
78
+
79
+    unsigned thread_size;
80
+
81
+    uint16_t *mb_bits;
82
+    uint8_t  *mb_qscale;
83
+
84
+    RCCMPEntry *mb_cmp;
85
+    RCEntry   (*mb_rc)[8160];
86
+} DNXHDEncContext;
87
+
88
+#define LAMBDA_FRAC_BITS 10
89
+
90
+static int dnxhd_init_vlc(DNXHDEncContext *ctx)
91
+{
92
+    int i;
93
+
94
+    CHECKED_ALLOCZ(ctx->table_vlc_codes, 449*2);
95
+    CHECKED_ALLOCZ(ctx->table_vlc_bits,    449);
96
+    CHECKED_ALLOCZ(ctx->table_run_codes,  63*2);
97
+    CHECKED_ALLOCZ(ctx->table_run_bits,     63);
98
+
99
+    for (i = 0; i < 257; i++) {
100
+        int level = ctx->cid_table->ac_level[i] +
101
+            (ctx->cid_table->ac_run_flag[i] << 7) + (ctx->cid_table->ac_index_flag[i] << 8);
102
+        assert(level < 449);
103
+        if (ctx->cid_table->ac_level[i] == 64 && ctx->cid_table->ac_index_flag[i])
104
+            level -= 64; // use 0+(1<<8) level
105
+        ctx->table_vlc_codes[level] = ctx->cid_table->ac_codes[i];
106
+        ctx->table_vlc_bits [level] = ctx->cid_table->ac_bits[i];
107
+    }
108
+    for (i = 0; i < 62; i++) {
109
+        int run = ctx->cid_table->run[i];
110
+        assert(run < 63);
111
+        ctx->table_run_codes[run] = ctx->cid_table->run_codes[i];
112
+        ctx->table_run_bits [run] = ctx->cid_table->run_bits[i];
113
+    }
114
+    return 0;
115
+ fail:
116
+    return -1;
117
+}
118
+
119
+static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
120
+{
121
+    // init first elem to 1 to avoid div by 0 in convert_matrix
122
+    uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
123
+    int qscale, i;
124
+
125
+    CHECKED_ALLOCZ(ctx->qmatrix_l,   (ctx->m.avctx->qmax+1) * 64 * sizeof(int));
126
+    CHECKED_ALLOCZ(ctx->qmatrix_c,   (ctx->m.avctx->qmax+1) * 64 * sizeof(int));
127
+    CHECKED_ALLOCZ(ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t));
128
+    CHECKED_ALLOCZ(ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t));
129
+
130
+    for (i = 1; i < 64; i++) {
131
+        int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
132
+        weight_matrix[j] = ctx->cid_table->luma_weigth[i];
133
+    }
134
+    ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
135
+                      ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
136
+    for (i = 1; i < 64; i++) {
137
+        int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
138
+        weight_matrix[j] = ctx->cid_table->chroma_weigth[i];
139
+    }
140
+    ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
141
+                      ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
142
+    for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
143
+        for (i = 0; i < 64; i++) {
144
+            ctx->qmatrix_l  [qscale]   [i] <<= 2; ctx->qmatrix_c  [qscale]   [i] <<= 2;
145
+            ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
146
+            ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
147
+        }
148
+    }
149
+    return 0;
150
+ fail:
151
+    return -1;
152
+}
153
+
154
+static int dnxhd_init_rc(DNXHDEncContext *ctx)
155
+{
156
+    CHECKED_ALLOCZ(ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry));
157
+    if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
158
+        CHECKED_ALLOCZ(ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry));
159
+
160
+    ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4) * 8;
161
+    ctx->qscale = 1;
162
+    ctx->lambda = 2<<LAMBDA_FRAC_BITS; // qscale 2
163
+    return 0;
164
+ fail:
165
+    return -1;
166
+}
167
+
168
+static int dnxhd_encode_init(AVCodecContext *avctx)
169
+{
170
+    DNXHDEncContext *ctx = avctx->priv_data;
171
+    int i, index;
172
+
173
+    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
174
+        if      (avctx->bit_rate == 120000000)
175
+            ctx->cid = 1242;
176
+        else if (avctx->bit_rate == 185000000)
177
+            ctx->cid = 1243;
178
+    } else {
179
+        if      (avctx->bit_rate == 120000000)
180
+            ctx->cid = 1237;
181
+        else if (avctx->bit_rate == 185000000)
182
+            ctx->cid = 1238;
183
+    }
184
+    if (!ctx->cid || avctx->width != 1920 || avctx->height != 1080 || avctx->pix_fmt != PIX_FMT_YUV422P) {
185
+        av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
186
+        return -1;
187
+    }
188
+
189
+    index = ff_dnxhd_get_cid_table(ctx->cid);
190
+    ctx->cid_table = &ff_dnxhd_cid_table[index];
191
+
192
+    ctx->m.avctx = avctx;
193
+    ctx->m.mb_intra = 1;
194
+    ctx->m.h263_aic = 1;
195
+
196
+    dsputil_init(&ctx->m.dsp, avctx);
197
+    ff_dct_common_init(&ctx->m);
198
+    if (!ctx->m.dct_quantize)
199
+        ctx->m.dct_quantize = dct_quantize_c;
200
+
201
+    ctx->m.mb_height = (avctx->height + 15) / 16;
202
+    ctx->m.mb_width  = (avctx->width  + 15) / 16;
203
+
204
+    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
205
+        ctx->interlaced = 1;
206
+        ctx->m.mb_height /= 2;
207
+    }
208
+
209
+    ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
210
+
211
+    if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
212
+        ctx->m.intra_quant_bias = avctx->intra_quant_bias;
213
+    if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0) // XXX tune lbias/cbias
214
+        return -1;
215
+
216
+    if (dnxhd_init_vlc(ctx) < 0)
217
+        return -1;
218
+    if (dnxhd_init_rc(ctx) < 0)
219
+        return -1;
220
+
221
+    CHECKED_ALLOCZ(ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t));
222
+    CHECKED_ALLOCZ(ctx->mb_bits,    ctx->m.mb_num   *sizeof(uint16_t));
223
+    CHECKED_ALLOCZ(ctx->mb_qscale,  ctx->m.mb_num   *sizeof(uint8_t));
224
+
225
+    ctx->frame.key_frame = 1;
226
+    ctx->frame.pict_type = FF_I_TYPE;
227
+    ctx->m.avctx->coded_frame = &ctx->frame;
228
+
229
+    if (avctx->thread_count > MAX_THREADS || (avctx->thread_count > ctx->m.mb_height)) {
230
+        av_log(avctx, AV_LOG_ERROR, "too many threads\n");
231
+        return -1;
232
+    }
233
+
234
+    ctx->thread[0] = ctx;
235
+    for (i = 1; i < avctx->thread_count; i++) {
236
+        ctx->thread[i] =  av_malloc(sizeof(DNXHDEncContext));
237
+        memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
238
+    }
239
+
240
+    for (i = 0; i < avctx->thread_count; i++) {
241
+        ctx->thread[i]->m.start_mb_y = (ctx->m.mb_height*(i  ) + avctx->thread_count/2) / avctx->thread_count;
242
+        ctx->thread[i]->m.end_mb_y   = (ctx->m.mb_height*(i+1) + avctx->thread_count/2) / avctx->thread_count;
243
+    }
244
+
245
+    return 0;
246
+ fail: //for CHECKED_ALLOCZ
247
+    return -1;
248
+}
249
+
250
+static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
251
+{
252
+    DNXHDEncContext *ctx = avctx->priv_data;
253
+    const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 };
254
+
255
+    memcpy(buf, header_prefix, 5);
256
+    buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01;
257
+    buf[6] = 0x80; // crc flag off
258
+    buf[7] = 0xa0; // reserved
259
+    AV_WB16(buf + 0x18, avctx->height); // ALPF
260
+    AV_WB16(buf + 0x1a, avctx->width);  // SPL
261
+    AV_WB16(buf + 0x1d, avctx->height); // NAL
262
+
263
+    buf[0x21] = 0x38; // FIXME 8 bit per comp
264
+    buf[0x22] = 0x88 + (ctx->frame.interlaced_frame<<2);
265
+    AV_WB32(buf + 0x28, ctx->cid); // CID
266
+    buf[0x2c] = ctx->interlaced ? 0 : 0x80;
267
+
268
+    buf[0x5f] = 0x01; // UDL
269
+
270
+    buf[0x167] = 0x02; // reserved
271
+    AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS
272
+    buf[0x16d] = ctx->m.mb_height; // Ns
273
+    buf[0x16f] = 0x10; // reserved
274
+
275
+    ctx->msip = buf + 0x170;
276
+    return 0;
277
+}
278
+
279
+static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
280
+{
281
+    int nbits;
282
+    if (diff < 0) {
283
+        nbits = av_log2_16bit(-2*diff);
284
+        diff--;
285
+    } else {
286
+        nbits = av_log2_16bit(2*diff);
287
+    }
288
+    put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
289
+             (ctx->cid_table->dc_codes[nbits]<<nbits) + (diff & ((1 << nbits) - 1)));
290
+}
291
+
292
+static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, DCTELEM *block, int last_index, int n)
293
+{
294
+    int last_non_zero = 0;
295
+    int offset = 0;
296
+    int slevel, i, j;
297
+
298
+    dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
299
+    ctx->m.last_dc[n] = block[0];
300
+
301
+    for (i = 1; i <= last_index; i++) {
302
+        j = ctx->m.intra_scantable.permutated[i];
303
+        slevel = block[j];
304
+        if (slevel) {
305
+            int run_level = i - last_non_zero - 1;
306
+            int sign;
307
+            MASK_ABS(sign, slevel);
308
+            if (slevel > 64) {
309
+                offset = (slevel-1) >> 6;
310
+                slevel = 256 | (slevel & 63); // level 64 is treated as 0
311
+            }
312
+            if (run_level)
313
+                slevel |= 128;
314
+            put_bits(&ctx->m.pb, ctx->table_vlc_bits[slevel]+1, (ctx->table_vlc_codes[slevel]<<1)|(sign&1));
315
+            if (offset) {
316
+                put_bits(&ctx->m.pb, 4, offset);
317
+                offset = 0;
318
+            }
319
+            if (run_level)
320
+                put_bits(&ctx->m.pb, ctx->table_run_bits[run_level], ctx->table_run_codes[run_level]);
321
+            last_non_zero = i;
322
+        }
323
+    }
324
+    put_bits(&ctx->m.pb, ctx->table_vlc_bits[0], ctx->table_vlc_codes[0]); // EOB
325
+}
326
+
327
+static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *block, int n, int qscale, int last_index)
328
+{
329
+    const uint8_t *weigth_matrix;
330
+    int level;
331
+    int i;
332
+
333
+    weigth_matrix = (n&2) ? ctx->cid_table->chroma_weigth : ctx->cid_table->luma_weigth;
334
+
335
+    for (i = 1; i <= last_index; i++) {
336
+        int j = ctx->m.intra_scantable.permutated[i];
337
+        level = block[j];
338
+        if (level) {
339
+            if (level < 0) {
340
+                level = (1-2*level) * qscale * weigth_matrix[i];
341
+                if (weigth_matrix[i] != 32)
342
+                    level += 32;
343
+                level >>= 6;
344
+                level = -level;
345
+            } else {
346
+                level = (2*level+1) * qscale * weigth_matrix[i];
347
+                if (weigth_matrix[i] != 32)
348
+                    level += 32;
349
+                level >>= 6;
350
+            }
351
+            block[j] = level;
352
+        }
353
+    }
354
+}
355
+
356
+static av_always_inline int dnxhd_ssd_block(DCTELEM *qblock, DCTELEM *block)
357
+{
358
+    int score = 0;
359
+    int i;
360
+    for (i = 0; i < 64; i++)
361
+        score += (block[i]-qblock[i])*(block[i]-qblock[i]);
362
+    return score;
363
+}
364
+
365
+static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *block, int last_index)
366
+{
367
+    int last_non_zero = 0;
368
+    int bits = 0;
369
+    int i, j, level;
370
+    for (i = 1; i <= last_index; i++) {
371
+        j = ctx->m.intra_scantable.permutated[i];
372
+        level = block[j];
373
+        if (level) {
374
+            int run_level = i - last_non_zero - 1;
375
+            level = FFABS(level);
376
+            if (level > 64) {
377
+                level = 256 | (level & 63); // level 64 is treated as 0
378
+                bits += 4;
379
+            }
380
+            level |= (!!run_level)<<7;
381
+            bits += ctx->table_vlc_bits[level]+1 + ctx->table_run_bits[run_level];
382
+            last_non_zero = i;
383
+        }
384
+    }
385
+    return bits;
386
+}
387
+
388
+static av_always_inline void dnxhd_get_pixels_4x8(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
389
+{
390
+    int i;
391
+    for (i = 0; i < 4; i++) {
392
+        block[0] = pixels[0];
393
+        block[1] = pixels[1];
394
+        block[2] = pixels[2];
395
+        block[3] = pixels[3];
396
+        block[4] = pixels[4];
397
+        block[5] = pixels[5];
398
+        block[6] = pixels[6];
399
+        block[7] = pixels[7];
400
+        pixels += line_size;
401
+        block += 8;
402
+    }
403
+    memcpy(block   , block- 8, sizeof(*block)*8);
404
+    memcpy(block+ 8, block-16, sizeof(*block)*8);
405
+    memcpy(block+16, block-24, sizeof(*block)*8);
406
+    memcpy(block+24, block-32, sizeof(*block)*8);
407
+}
408
+
409
+static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
410
+{
411
+    const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize)   + (mb_x << 4);
412
+    const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
413
+    const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
414
+    DSPContext *dsp = &ctx->m.dsp;
415
+
416
+    dsp->get_pixels(ctx->blocks[0], ptr_y    , ctx->m.linesize);
417
+    dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
418
+    dsp->get_pixels(ctx->blocks[2], ptr_u    , ctx->m.uvlinesize);
419
+    dsp->get_pixels(ctx->blocks[3], ptr_v    , ctx->m.uvlinesize);
420
+
421
+    if (mb_y+1 == ctx->m.mb_height) {
422
+        if (ctx->interlaced) {
423
+            dnxhd_get_pixels_4x8(ctx->blocks[4], ptr_y + ctx->dct_y_offset    , ctx->m.linesize);
424
+            dnxhd_get_pixels_4x8(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
425
+            dnxhd_get_pixels_4x8(ctx->blocks[6], ptr_u + ctx->dct_uv_offset   , ctx->m.uvlinesize);
426
+            dnxhd_get_pixels_4x8(ctx->blocks[7], ptr_v + ctx->dct_uv_offset   , ctx->m.uvlinesize);
427
+        } else
428
+            memset(ctx->blocks[4], 0, 4*64*sizeof(DCTELEM));
429
+    } else {
430
+        dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset    , ctx->m.linesize);
431
+        dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
432
+        dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset   , ctx->m.uvlinesize);
433
+        dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset   , ctx->m.uvlinesize);
434
+    }
435
+}
436
+
437
+static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
438
+{
439
+    if (i&2) {
440
+        ctx->m.q_intra_matrix16 = ctx->qmatrix_c16;
441
+        ctx->m.q_intra_matrix   = ctx->qmatrix_c;
442
+        return 1 + (i&1);
443
+    } else {
444
+        ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
445
+        ctx->m.q_intra_matrix   = ctx->qmatrix_l;
446
+        return 0;
447
+    }
448
+}
449
+
450
+static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg)
451
+{
452
+    DNXHDEncContext *ctx = arg;
453
+    int mb_y, mb_x;
454
+    int qscale = ctx->thread[0]->qscale;
455
+
456
+    for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
457
+        ctx->m.last_dc[0] =
458
+        ctx->m.last_dc[1] =
459
+        ctx->m.last_dc[2] = 1024;
460
+
461
+        for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
462
+            unsigned mb = mb_y * ctx->m.mb_width + mb_x;
463
+            int ssd     = 0;
464
+            int ac_bits = 0;
465
+            int dc_bits = 0;
466
+            int i;
467
+
468
+            dnxhd_get_blocks(ctx, mb_x, mb_y);
469
+
470
+            for (i = 0; i < 8; i++) {
471
+                DECLARE_ALIGNED_16(DCTELEM, block[64]);
472
+                DCTELEM *src_block = ctx->blocks[i];
473
+                int overflow, nbits, diff, last_index;
474
+                int n = dnxhd_switch_matrix(ctx, i);
475
+
476
+                memcpy(block, src_block, sizeof(block));
477
+                last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
478
+                ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index);
479
+
480
+                diff = block[0] - ctx->m.last_dc[n];
481
+                if (diff < 0) nbits = av_log2_16bit(-2*diff);
482
+                else          nbits = av_log2_16bit( 2*diff);
483
+                dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
484
+
485
+                ctx->m.last_dc[n] = block[0];
486
+
487
+                if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
488
+                    dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
489
+                    ctx->m.dsp.idct(block);
490
+                    ssd += dnxhd_ssd_block(block, src_block);
491
+                }
492
+            }
493
+            ctx->mb_rc[qscale][mb].ssd = ssd;
494
+            ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->table_vlc_bits[0];
495
+        }
496
+    }
497
+    return 0;
498
+}
499
+
500
+static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg)
501
+{
502
+    DNXHDEncContext *ctx = arg;
503
+    int mb_y, mb_x;
504
+
505
+    for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
506
+        ctx->m.last_dc[0] =
507
+        ctx->m.last_dc[1] =
508
+        ctx->m.last_dc[2] = 1024;
509
+        for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
510
+            unsigned mb = mb_y * ctx->m.mb_width + mb_x;
511
+            int qscale = ctx->mb_qscale[mb];
512
+            int i;
513
+
514
+            put_bits(&ctx->m.pb, 12, qscale<<1);
515
+
516
+            dnxhd_get_blocks(ctx, mb_x, mb_y);
517
+
518
+            for (i = 0; i < 8; i++) {
519
+                DCTELEM *block = ctx->blocks[i];
520
+                int last_index, overflow;
521
+                int n = dnxhd_switch_matrix(ctx, i);
522
+                last_index = ctx->m.dct_quantize((MpegEncContext*)ctx, block, i, qscale, &overflow);
523
+                dnxhd_encode_block(ctx, block, last_index, n);
524
+            }
525
+        }
526
+        if (put_bits_count(&ctx->m.pb)&31)
527
+            put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0);
528
+    }
529
+    flush_put_bits(&ctx->m.pb);
530
+    return 0;
531
+}
532
+
533
+static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx, uint8_t *buf)
534
+{
535
+    int mb_y, mb_x;
536
+    int i, offset = 0;
537
+    for (i = 0; i < ctx->m.avctx->thread_count; i++) {
538
+        int thread_size = 0;
539
+        for (mb_y = ctx->thread[i]->m.start_mb_y; mb_y < ctx->thread[i]->m.end_mb_y; mb_y++) {
540
+            ctx->slice_size[mb_y] = 0;
541
+            for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
542
+                unsigned mb = mb_y * ctx->m.mb_width + mb_x;
543
+                ctx->slice_size[mb_y] += ctx->mb_bits[mb];
544
+            }
545
+            ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31;
546
+            ctx->slice_size[mb_y] >>= 3;
547
+            thread_size += ctx->slice_size[mb_y];
548
+        }
549
+        init_put_bits(&ctx->thread[i]->m.pb, buf + 640 + offset, thread_size);
550
+        offset += thread_size;
551
+    }
552
+}
553
+
554
+static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg)
555
+{
556
+    DNXHDEncContext *ctx = arg;
557
+    int mb_y, mb_x;
558
+    for (mb_y = ctx->m.start_mb_y; mb_y < ctx->m.end_mb_y; mb_y++) {
559
+        for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
560
+            unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
561
+            uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
562
+            int sum      = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
563
+            int varc     = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
564
+            ctx->mb_cmp[mb].value = varc;
565
+            ctx->mb_cmp[mb].mb = mb;
566
+        }
567
+    }
568
+    return 0;
569
+}
570
+
571
+static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
572
+{
573
+    unsigned lambda, up_lambda, down_lambda;
574
+    int x, y, q;
575
+
576
+    for (q = 1; q < avctx->qmax; q++) {
577
+        ctx->qscale = q;
578
+        avctx->execute(avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count);
579
+    }
580
+    up_lambda = avctx->qmax<<LAMBDA_FRAC_BITS;
581
+    down_lambda = 1; // higher ?
582
+    lambda = ctx->lambda;
583
+
584
+    for (;;) {
585
+        int bits = 0;
586
+        int end = 0;
587
+        if (lambda == up_lambda) {
588
+            lambda--;
589
+            end = 1; // need to set final qscales/bits
590
+        }
591
+        if (lambda == down_lambda) {
592
+            lambda++;
593
+            end = 1;
594
+        }
595
+        for (y = 0; y < ctx->m.mb_height; y++) {
596
+            for (x = 0; x < ctx->m.mb_width; x++) {
597
+                unsigned min = UINT_MAX;
598
+                int qscale = 1;
599
+                int mb = y*ctx->m.mb_width+x;
600
+                for (q = 1; q < avctx->qmax; q++) {
601
+                    unsigned score = ctx->mb_rc[q][mb].bits*lambda+(ctx->mb_rc[q][mb].ssd<<LAMBDA_FRAC_BITS);
602
+                    if (score < min) {
603
+                        min = score;
604
+                        qscale = q;
605
+                    }
606
+                }
607
+                bits += ctx->mb_rc[qscale][mb].bits;
608
+                ctx->mb_qscale[mb] = qscale;
609
+                ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits;
610
+            }
611
+            bits = (bits+31)&~31; // padding
612
+            if (bits > ctx->frame_bits)
613
+                break;
614
+        }
615
+        //dprintf(ctx->m.avctx, "lambda %d, up %d, down %d, bits %d, frame %d\n", lambda, up_lambda, down_lambda, bits, ctx->frame_bits);
616
+        if (end) {
617
+            if (bits > ctx->frame_bits)
618
+                return -1;
619
+            break;
620
+        }
621
+        if (bits < ctx->frame_bits) {
622
+            up_lambda = lambda;
623
+            lambda = (down_lambda+lambda)>>1;
624
+        } else {
625
+            down_lambda = lambda;
626
+            lambda = (up_lambda+lambda)>>1;
627
+        }
628
+    }
629
+    ctx->lambda = lambda;
630
+    return 0;
631
+}
632
+
633
+static int dnxhd_find_qscale(DNXHDEncContext *ctx)
634
+{
635
+    int bits = 0;
636
+    int up_step = 1;
637
+    int down_step = 1;
638
+    int last_higher = 0;
639
+    int last_lower = INT_MAX;
640
+    int qscale;
641
+    int x, y;
642
+
643
+    qscale = ctx->qscale;
644
+    for (;;) {
645
+        bits = 0;
646
+        ctx->qscale = qscale;
647
+        // XXX avoid recalculating bits
648
+        ctx->m.avctx->execute(ctx->m.avctx, dnxhd_calc_bits_thread, (void**)&ctx->thread[0], NULL, ctx->m.avctx->thread_count);
649
+        for (y = 0; y < ctx->m.mb_height; y++) {
650
+            for (x = 0; x < ctx->m.mb_width; x++)
651
+                bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits;
652
+            bits = (bits+31)&~31; // padding
653
+            if (bits > ctx->frame_bits)
654
+                break;
655
+        }
656
+        //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
657
+        //        ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
658
+        if (bits < ctx->frame_bits) {
659
+            if (qscale == 1)
660
+                break;
661
+            if (last_higher == qscale - 1) {
662
+                qscale = last_higher;
663
+                break;
664
+            }
665
+            last_lower = FFMIN(qscale, last_lower);
666
+            if (last_higher != 0)
667
+                qscale = (qscale+last_higher)>>1;
668
+            else
669
+                qscale -= down_step++;
670
+            if (qscale < 1)
671
+                qscale = 1;
672
+            up_step = 1;
673
+        } else {
674
+            if (last_lower == qscale + 1)
675
+                break;
676
+            last_higher = FFMAX(qscale, last_higher);
677
+            if (last_lower != INT_MAX)
678
+                qscale = (qscale+last_lower)>>1;
679
+            else
680
+                qscale += up_step++;
681
+            down_step = 1;
682
+            if (qscale >= ctx->m.avctx->qmax)
683
+                return -1;
684
+        }
685
+    }
686
+    //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
687
+    ctx->qscale = qscale;
688
+    return 0;
689
+}
690
+
691
+static int dnxhd_rc_cmp(const void *a, const void *b)
692
+{
693
+    return ((RCCMPEntry *)b)->value - ((RCCMPEntry *)a)->value;
694
+}
695
+
696
+static int dnxhd_encode_variance(AVCodecContext *avctx, DNXHDEncContext *ctx)
697
+{
698
+    int max_bits = 0;
699
+    int x, y;
700
+    if (dnxhd_find_qscale(ctx) < 0)
701
+        return -1;
702
+    for (y = 0; y < ctx->m.mb_height; y++) {
703
+        for (x = 0; x < ctx->m.mb_width; x++) {
704
+            int mb = y*ctx->m.mb_width+x;
705
+            int delta_bits;
706
+            ctx->mb_qscale[mb] = ctx->qscale;
707
+            ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits;
708
+            max_bits += ctx->mb_rc[ctx->qscale][mb].bits;
709
+            if (!RC_VARIANCE) {
710
+                delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits;
711
+                ctx->mb_cmp[mb].mb = mb;
712
+                ctx->mb_cmp[mb].value = delta_bits ?
713
+                    ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits
714
+                    : INT_MIN; //avoid increasing qscale
715
+            }
716
+        }
717
+        max_bits += 31; //worst padding
718
+    }
719
+    if (max_bits > ctx->frame_bits) {
720
+        if (RC_VARIANCE)
721
+            avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count);
722
+        qsort(ctx->mb_cmp, ctx->m.mb_num, sizeof(RCEntry), dnxhd_rc_cmp);
723
+        for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
724
+            int mb = ctx->mb_cmp[x].mb;
725
+            max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
726
+            ctx->mb_qscale[mb] = ctx->qscale+1;
727
+            ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;
728
+        }
729
+    }
730
+    return 0;
731
+}
732
+
733
+static void dnxhd_load_picture(DNXHDEncContext *ctx, AVFrame *frame)
734
+{
735
+    int i;
736
+
737
+    for (i = 0; i < 3; i++) {
738
+        ctx->frame.data[i]     = frame->data[i];
739
+        ctx->frame.linesize[i] = frame->linesize[i];
740
+    }
741
+
742
+    for (i = 0; i < ctx->m.avctx->thread_count; i++) {
743
+        ctx->thread[i]->m.linesize    = ctx->frame.linesize[0]<<ctx->interlaced;
744
+        ctx->thread[i]->m.uvlinesize  = ctx->frame.linesize[1]<<ctx->interlaced;
745
+        ctx->thread[i]->dct_y_offset  = ctx->m.linesize  *8;
746
+        ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
747
+    }
748
+
749
+    ctx->frame.interlaced_frame = frame->interlaced_frame;
750
+    ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
751
+}
752
+
753
+static int dnxhd_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
754
+{
755
+    DNXHDEncContext *ctx = avctx->priv_data;
756
+    int first_field = 1;
757
+    int offset, i, ret;
758
+
759
+    if (buf_size < ctx->cid_table->frame_size) {
760
+        av_log(avctx, AV_LOG_ERROR, "output buffer is too small to compress picture\n");
761
+        return -1;
762
+    }
763
+
764
+    dnxhd_load_picture(ctx, data);
765
+
766
+ encode_coding_unit:
767
+    for (i = 0; i < 3; i++) {
768
+        ctx->src[i] = ctx->frame.data[i];
769
+        if (ctx->interlaced && ctx->cur_field)
770
+            ctx->src[i] += ctx->frame.linesize[i];
771
+    }
772
+
773
+    dnxhd_write_header(avctx, buf);
774
+
775
+    if (avctx->mb_decision == FF_MB_DECISION_RD)
776
+        ret = dnxhd_encode_rdo(avctx, ctx);
777
+    else
778
+        ret = dnxhd_encode_variance(avctx, ctx);
779
+    if (ret < 0) {
780
+        av_log(avctx, AV_LOG_ERROR, "picture could not fit ratecontrol constraints\n");
781
+        return -1;
782
+    }
783
+
784
+    dnxhd_setup_threads_slices(ctx, buf);
785
+
786
+    offset = 0;
787
+    for (i = 0; i < ctx->m.mb_height; i++) {
788
+        AV_WB32(ctx->msip + i * 4, offset);
789
+        offset += ctx->slice_size[i];
790
+        assert(!(ctx->slice_size[i] & 3));
791
+    }
792
+
793
+    avctx->execute(avctx, dnxhd_encode_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count);
794
+
795
+    AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE); // EOF
796
+
797
+    if (ctx->interlaced && first_field) {
798
+        first_field     = 0;
799
+        ctx->cur_field ^= 1;
800
+        buf      += ctx->cid_table->coding_unit_size;
801
+        buf_size -= ctx->cid_table->coding_unit_size;
802
+        goto encode_coding_unit;
803
+    }
804
+
805
+    return ctx->cid_table->frame_size;
806
+}
807
+
808
+static int dnxhd_encode_end(AVCodecContext *avctx)
809
+{
810
+    DNXHDEncContext *ctx = avctx->priv_data;
811
+    int i;
812
+
813
+    av_freep(&ctx->table_vlc_codes);
814
+    av_freep(&ctx->table_vlc_bits);
815
+    av_freep(&ctx->table_run_codes);
816
+    av_freep(&ctx->table_run_bits);
817
+
818
+    av_freep(&ctx->mb_bits);
819
+    av_freep(&ctx->mb_qscale);
820
+    av_freep(&ctx->mb_rc);
821
+    av_freep(&ctx->mb_cmp);
822
+    av_freep(&ctx->slice_size);
823
+
824
+    av_freep(&ctx->qmatrix_c);
825
+    av_freep(&ctx->qmatrix_l);
826
+    av_freep(&ctx->qmatrix_c16);
827
+    av_freep(&ctx->qmatrix_l16);
828
+
829
+    for (i = 1; i < avctx->thread_count; i++)
830
+        av_freep(&ctx->thread[i]);
831
+
832
+    return 0;
833
+}
834
+
835
+AVCodec dnxhd_encoder = {
836
+    "dnxhd",
837
+    CODEC_TYPE_VIDEO,
838
+    CODEC_ID_DNXHD,
839
+    sizeof(DNXHDEncContext),
840
+    dnxhd_encode_init,
841
+    dnxhd_encode_picture,
842
+    dnxhd_encode_end,
843
+    .pix_fmts = (enum PixelFormat[]){PIX_FMT_YUV422P, -1},
844
+};
... ...
@@ -516,6 +516,51 @@ static int mov_write_avcc_tag(ByteIOContext *pb, MOVTrack *track)
516 516
     return updateSize(pb, pos);
517 517
 }
518 518
 
519
+/* also used by all avid codecs (dv, imx, meridien) and their variants */
520
+static int mov_write_avid_tag(ByteIOContext *pb, MOVTrack *track)
521
+{
522
+    int i;
523
+    put_be32(pb, 24); /* size */
524
+    put_tag(pb, "ACLR");
525
+    put_tag(pb, "ACLR");
526
+    put_tag(pb, "0001");
527
+    put_be32(pb, 1); /* yuv 1 / rgb 2 ? */
528
+    put_be32(pb, 0); /* unknown */
529
+
530
+    put_be32(pb, 24); /* size */
531
+    put_tag(pb, "APRG");
532
+    put_tag(pb, "APRG");
533
+    put_tag(pb, "0001");
534
+    put_be32(pb, 1); /* unknown */
535
+    put_be32(pb, 0); /* unknown */
536
+
537
+    put_be32(pb, 120); /* size */
538
+    put_tag(pb, "ARES");
539
+    put_tag(pb, "ARES");
540
+    put_tag(pb, "0001");
541
+    put_be32(pb, AV_RB32(track->vosData + 0x28)); /* dnxhd cid, some id ? */
542
+    put_be32(pb, track->enc->width);
543
+    /* values below are based on samples created with quicktime and avid codecs */
544
+    if (track->vosData[5] & 2) { // interlaced
545
+        put_be32(pb, track->enc->height/2);
546
+        put_be32(pb, 2); /* unknown */
547
+        put_be32(pb, 0); /* unknown */
548
+        put_be32(pb, 4); /* unknown */
549
+    } else {
550
+        put_be32(pb, track->enc->height);
551
+        put_be32(pb, 1); /* unknown */
552
+        put_be32(pb, 0); /* unknown */
553
+        put_be32(pb, 5); /* unknown */
554
+    }
555
+    /* padding */
556
+    for (i = 0; i < 10; i++)
557
+        put_be64(pb, 0);
558
+
559
+    /* extra padding for stsd needed */
560
+    put_be32(pb, 0);
561
+    return 0;
562
+}
563
+
519 564
 static int mov_find_video_codec_tag(AVFormatContext *s, MOVTrack *track)
520 565
 {
521 566
     int tag = track->enc->codec_tag;
... ...
@@ -623,6 +668,8 @@ static int mov_write_video_tag(ByteIOContext *pb, MOVTrack* track)
623 623
         mov_write_svq3_tag(pb);
624 624
     else if(track->enc->codec_id == CODEC_ID_H264)
625 625
         mov_write_avcc_tag(pb, track);
626
+    else if(track->enc->codec_id == CODEC_ID_DNXHD)
627
+        mov_write_avid_tag(pb, track);
626 628
 
627 629
     return updateSize (pb, pos);
628 630
 }
... ...
@@ -1561,6 +1608,13 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
1561 1561
         avc_parse_nal_units(&pkt->data, &pkt->size);
1562 1562
         assert(pkt->size);
1563 1563
         size = pkt->size;
1564
+    } else if (enc->codec_id == CODEC_ID_DNXHD && !trk->vosLen) {
1565
+        /* copy frame header to create needed atoms */
1566
+        if (size < 640)
1567
+            return -1;
1568
+        trk->vosLen = 640;
1569
+        trk->vosData = av_malloc(trk->vosLen);
1570
+        memcpy(trk->vosData, pkt->data, 640);
1564 1571
     }
1565 1572
 
1566 1573
     if (!(trk->entry % MOV_INDEX_CLUSTER_SIZE)) {