Browse code

prores encoder

Kostya Shishkov authored on 2012/02/03 04:54:53
Showing 8 changed files
... ...
@@ -7,6 +7,7 @@ version <next>:
7 7
 - Support for fragmentation in the mov/mp4 muxer
8 8
 - ISMV (Smooth Streaming) muxer
9 9
 - CDXL demuxer and decoder
10
+- Apple ProRes encoder
10 11
 
11 12
 
12 13
 version 0.8:
... ...
@@ -406,7 +406,7 @@ following image formats are supported:
406 406
     @tab Used in Chinese MP3 players.
407 407
 @item ANSI/ASCII art         @tab     @tab  X
408 408
 @item Apple MJPEG-B          @tab     @tab  X
409
-@item Apple ProRes           @tab     @tab  X
409
+@item Apple ProRes           @tab  X  @tab  X
410 410
 @item Apple QuickDraw        @tab     @tab  X
411 411
     @tab fourcc: qdrw
412 412
 @item Asus v1                @tab  X  @tab  X
... ...
@@ -302,6 +302,7 @@ OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
302 302
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
303 303
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
304 304
 OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdata.o proresdsp.o
305
+OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc.o proresdata.o proresdsp.o
305 306
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
306 307
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
307 308
                                           celp_filters.o acelp_vectors.o \
... ...
@@ -168,7 +168,7 @@ void avcodec_register_all(void)
168 168
     REGISTER_DECODER (PICTOR, pictor);
169 169
     REGISTER_ENCDEC  (PNG, png);
170 170
     REGISTER_ENCDEC  (PPM, ppm);
171
-    REGISTER_DECODER (PRORES, prores);
171
+    REGISTER_ENCDEC  (PRORES, prores);
172 172
     REGISTER_DECODER (PTX, ptx);
173 173
     REGISTER_DECODER (QDRAW, qdraw);
174 174
     REGISTER_DECODER (QPEG, qpeg);
... ...
@@ -51,13 +51,30 @@ static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const
51 51
     put_pixels(out, linesize >> 1, block);
52 52
 }
53 53
 
54
+static void prores_fdct_c(const uint16_t *src, int linesize, DCTELEM *block)
55
+{
56
+    int x, y;
57
+    const uint16_t *tsrc = src;
58
+
59
+    for (y = 0; y < 8; y++) {
60
+        for (x = 0; x < 8; x++)
61
+            block[y * 8 + x] = tsrc[x];
62
+        tsrc += linesize >> 1;
63
+    }
64
+    ff_jpeg_fdct_islow_10(block);
65
+}
66
+
54 67
 void ff_proresdsp_init(ProresDSPContext *dsp)
55 68
 {
56 69
     dsp->idct_put = prores_idct_put_c;
57 70
     dsp->idct_permutation_type = FF_NO_IDCT_PERM;
71
+    dsp->fdct     = prores_fdct_c;
72
+    dsp->dct_permutation_type  = FF_NO_IDCT_PERM;
58 73
 
59 74
     if (HAVE_MMX) ff_proresdsp_x86_init(dsp);
60 75
 
61 76
     ff_init_scantable_permutation(dsp->idct_permutation,
62 77
                                   dsp->idct_permutation_type);
78
+    ff_init_scantable_permutation(dsp->dct_permutation,
79
+                                  dsp->dct_permutation_type);
63 80
 }
... ...
@@ -30,7 +30,10 @@
30 30
 typedef struct {
31 31
     int idct_permutation_type;
32 32
     uint8_t idct_permutation[64];
33
+    int dct_permutation_type;
34
+    uint8_t dct_permutation[64];
33 35
     void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
36
+    void (* fdct) (const uint16_t *src, int linesize, DCTELEM *block);
34 37
 } ProresDSPContext;
35 38
 
36 39
 void ff_proresdsp_init(ProresDSPContext *dsp);
37 40
new file mode 100644
... ...
@@ -0,0 +1,836 @@
0
+/*
1
+ * Apple ProRes encoder
2
+ *
3
+ * Copyright (c) 2012 Konstantin Shishkov
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "libavutil/opt.h"
23
+#include "avcodec.h"
24
+#include "put_bits.h"
25
+#include "bytestream.h"
26
+#include "internal.h"
27
+#include "proresdsp.h"
28
+#include "proresdata.h"
29
+
30
+#define CFACTOR_Y422 2
31
+#define CFACTOR_Y444 3
32
+
33
+#define MAX_MBS_PER_SLICE 8
34
+
35
+#define MAX_PLANES 3 // should be increased to 4 when there's PIX_FMT_YUV444AP10
36
+
37
+enum {
38
+    PRORES_PROFILE_PROXY = 0,
39
+    PRORES_PROFILE_LT,
40
+    PRORES_PROFILE_STANDARD,
41
+    PRORES_PROFILE_HQ,
42
+};
43
+
44
+#define NUM_MB_LIMITS 4
45
+static const int prores_mb_limits[NUM_MB_LIMITS] = {
46
+    1620, // up to 720x576
47
+    2700, // up to 960x720
48
+    6075, // up to 1440x1080
49
+    9216, // up to 2048x1152
50
+};
51
+
52
+static const struct prores_profile {
53
+    const char *full_name;
54
+    uint32_t    tag;
55
+    int         min_quant;
56
+    int         max_quant;
57
+    int         br_tab[NUM_MB_LIMITS];
58
+    uint8_t     quant[64];
59
+} prores_profile_info[4] = {
60
+    {
61
+        .full_name = "proxy",
62
+        .tag       = MKTAG('a', 'p', 'c', 'o'),
63
+        .min_quant = 4,
64
+        .max_quant = 8,
65
+        .br_tab    = { 300, 242, 220, 194 },
66
+        .quant     = {
67
+             4,  7,  9, 11, 13, 14, 15, 63,
68
+             7,  7, 11, 12, 14, 15, 63, 63,
69
+             9, 11, 13, 14, 15, 63, 63, 63,
70
+            11, 11, 13, 14, 63, 63, 63, 63,
71
+            11, 13, 14, 63, 63, 63, 63, 63,
72
+            13, 14, 63, 63, 63, 63, 63, 63,
73
+            13, 63, 63, 63, 63, 63, 63, 63,
74
+            63, 63, 63, 63, 63, 63, 63, 63,
75
+        },
76
+    },
77
+    {
78
+        .full_name = "LT",
79
+        .tag       = MKTAG('a', 'p', 'c', 's'),
80
+        .min_quant = 1,
81
+        .max_quant = 9,
82
+        .br_tab    = { 720, 560, 490, 440 },
83
+        .quant     = {
84
+             4,  5,  6,  7,  9, 11, 13, 15,
85
+             5,  5,  7,  8, 11, 13, 15, 17,
86
+             6,  7,  9, 11, 13, 15, 15, 17,
87
+             7,  7,  9, 11, 13, 15, 17, 19,
88
+             7,  9, 11, 13, 14, 16, 19, 23,
89
+             9, 11, 13, 14, 16, 19, 23, 29,
90
+             9, 11, 13, 15, 17, 21, 28, 35,
91
+            11, 13, 16, 17, 21, 28, 35, 41,
92
+        },
93
+    },
94
+    {
95
+        .full_name = "standard",
96
+        .tag       = MKTAG('a', 'p', 'c', 'n'),
97
+        .min_quant = 1,
98
+        .max_quant = 6,
99
+        .br_tab    = { 1050, 808, 710, 632 },
100
+        .quant     = {
101
+             4,  4,  5,  5,  6,  7,  7,  9,
102
+             4,  4,  5,  6,  7,  7,  9,  9,
103
+             5,  5,  6,  7,  7,  9,  9, 10,
104
+             5,  5,  6,  7,  7,  9,  9, 10,
105
+             5,  6,  7,  7,  8,  9, 10, 12,
106
+             6,  7,  7,  8,  9, 10, 12, 15,
107
+             6,  7,  7,  9, 10, 11, 14, 17,
108
+             7,  7,  9, 10, 11, 14, 17, 21,
109
+        },
110
+    },
111
+    {
112
+        .full_name = "high quality",
113
+        .tag       = MKTAG('a', 'p', 'c', 'h'),
114
+        .min_quant = 1,
115
+        .max_quant = 6,
116
+        .br_tab    = { 1566, 1216, 1070, 950 },
117
+        .quant     = {
118
+             4,  4,  4,  4,  4,  4,  4,  4,
119
+             4,  4,  4,  4,  4,  4,  4,  4,
120
+             4,  4,  4,  4,  4,  4,  4,  4,
121
+             4,  4,  4,  4,  4,  4,  4,  5,
122
+             4,  4,  4,  4,  4,  4,  5,  5,
123
+             4,  4,  4,  4,  4,  5,  5,  6,
124
+             4,  4,  4,  4,  5,  5,  6,  7,
125
+             4,  4,  4,  4,  5,  6,  7,  7,
126
+        },
127
+    }
128
+// for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
129
+};
130
+
131
+#define TRELLIS_WIDTH 16
132
+#define SCORE_LIMIT   INT_MAX / 2
133
+
134
+struct TrellisNode {
135
+    int prev_node;
136
+    int quant;
137
+    int bits;
138
+    int score;
139
+};
140
+
141
+typedef struct ProresContext {
142
+    AVClass *class;
143
+    DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
144
+    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
145
+    int16_t quants[16][64];
146
+
147
+    ProresDSPContext dsp;
148
+    ScanTable  scantable;
149
+
150
+    int mb_width, mb_height;
151
+    int mbs_per_slice;
152
+    int num_chroma_blocks, chroma_factor;
153
+    int slices_width;
154
+    int num_slices;
155
+    int num_planes;
156
+    int bits_per_mb;
157
+
158
+    int profile;
159
+    const struct prores_profile *profile_info;
160
+
161
+    struct TrellisNode *nodes;
162
+    int *slice_q;
163
+} ProresContext;
164
+
165
+static void get_slice_data(ProresContext *ctx, const uint16_t *src,
166
+                           int linesize, int x, int y, int w, int h,
167
+                           DCTELEM *blocks,
168
+                           int mbs_per_slice, int blocks_per_mb)
169
+{
170
+    const uint16_t *esrc;
171
+    const int mb_width = 4 * blocks_per_mb;
172
+    int elinesize;
173
+    int i, j, k;
174
+
175
+    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
176
+        if (x >= w) {
177
+            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
178
+                              * sizeof(*blocks));
179
+            return;
180
+        }
181
+        if (x + mb_width <= w && y + 16 <= h) {
182
+            esrc      = src;
183
+            elinesize = linesize;
184
+        } else {
185
+            int bw, bh, pix;
186
+            const int estride = 16 / sizeof(*ctx->emu_buf);
187
+
188
+            esrc      = ctx->emu_buf;
189
+            elinesize = 16;
190
+
191
+            bw = FFMIN(w - x, mb_width);
192
+            bh = FFMIN(h - y, 16);
193
+
194
+            for (j = 0; j < bh; j++) {
195
+                memcpy(ctx->emu_buf + j * estride, src + j * linesize,
196
+                       bw * sizeof(*src));
197
+                pix = ctx->emu_buf[j * estride + bw - 1];
198
+                for (k = bw; k < mb_width; k++)
199
+                    ctx->emu_buf[j * estride + k] = pix;
200
+            }
201
+            for (; j < 16; j++)
202
+                memcpy(ctx->emu_buf + j * estride,
203
+                       ctx->emu_buf + (bh - 1) * estride,
204
+                       mb_width * sizeof(*ctx->emu_buf));
205
+        }
206
+        ctx->dsp.fdct(esrc, elinesize, blocks);
207
+        blocks += 64;
208
+        if (blocks_per_mb > 2) {
209
+            ctx->dsp.fdct(src + 8, linesize, blocks);
210
+            blocks += 64;
211
+        }
212
+        ctx->dsp.fdct(src + linesize * 4, linesize, blocks);
213
+        blocks += 64;
214
+        if (blocks_per_mb > 2) {
215
+            ctx->dsp.fdct(src + linesize * 4 + 8, linesize, blocks);
216
+            blocks += 64;
217
+        }
218
+
219
+        x += mb_width;
220
+    }
221
+}
222
+
223
+/**
224
+ * Write an unsigned rice/exp golomb codeword.
225
+ */
226
+static inline void encode_vlc_codeword(PutBitContext *pb, uint8_t codebook, int val)
227
+{
228
+    unsigned int rice_order, exp_order, switch_bits, switch_val;
229
+    int exponent;
230
+
231
+    /* number of prefix bits to switch between Rice and expGolomb */
232
+    switch_bits = (codebook & 3) + 1;
233
+    rice_order  =  codebook >> 5;       /* rice code order */
234
+    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
235
+
236
+    switch_val  = switch_bits << rice_order;
237
+
238
+    if (val >= switch_val) {
239
+        val -= switch_val - (1 << exp_order);
240
+        exponent = av_log2(val);
241
+
242
+        put_bits(pb, exponent - exp_order + switch_bits, 0);
243
+        put_bits(pb, 1, 1);
244
+        put_bits(pb, exponent, val);
245
+    } else {
246
+        exponent = val >> rice_order;
247
+
248
+        if (exponent)
249
+            put_bits(pb, exponent, 0);
250
+        put_bits(pb, 1, 1);
251
+        if (rice_order)
252
+            put_sbits(pb, rice_order, val);
253
+    }
254
+}
255
+
256
+#define GET_SIGN(x)  ((x) >> 31)
257
+#define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
258
+
259
+static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
260
+                       int blocks_per_slice, int scale)
261
+{
262
+    int i;
263
+    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
264
+
265
+    prev_dc = (blocks[0] - 0x4000) / scale;
266
+    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
267
+    codebook = 3;
268
+    blocks  += 64;
269
+
270
+    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
271
+        dc       = (blocks[0] - 0x4000) / scale;
272
+        delta    = dc - prev_dc;
273
+        new_sign = GET_SIGN(delta);
274
+        delta    = (delta ^ sign) - sign;
275
+        code     = MAKE_CODE(delta);
276
+        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
277
+        codebook = (code + (code & 1)) >> 1;
278
+        codebook = FFMIN(codebook, 3);
279
+        sign     = new_sign;
280
+        prev_dc  = dc;
281
+    }
282
+}
283
+
284
+static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
285
+                       int blocks_per_slice,
286
+                       int plane_size_factor,
287
+                       const uint8_t *scan, const int16_t *qmat)
288
+{
289
+    int idx, i;
290
+    int run, level, run_cb, lev_cb;
291
+    int max_coeffs, abs_level;
292
+
293
+    max_coeffs = blocks_per_slice << 6;
294
+    run_cb     = ff_prores_run_to_cb_index[4];
295
+    lev_cb     = ff_prores_lev_to_cb_index[2];
296
+    run        = 0;
297
+
298
+    for (i = 1; i < 64; i++) {
299
+        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
300
+            level = blocks[idx] / qmat[scan[i]];
301
+            if (level) {
302
+                abs_level = FFABS(level);
303
+                encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
304
+                encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
305
+                                    abs_level - 1);
306
+                put_sbits(pb, 1, GET_SIGN(level));
307
+
308
+                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
309
+                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
310
+                run    = 0;
311
+            } else {
312
+                run++;
313
+            }
314
+        }
315
+    }
316
+}
317
+
318
+static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
319
+                              const uint16_t *src, int linesize,
320
+                              int mbs_per_slice, DCTELEM *blocks,
321
+                              int blocks_per_mb, int plane_size_factor,
322
+                              const int16_t *qmat)
323
+{
324
+    int blocks_per_slice, saved_pos;
325
+
326
+    saved_pos = put_bits_count(pb);
327
+    blocks_per_slice = mbs_per_slice * blocks_per_mb;
328
+
329
+    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
330
+    encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
331
+               ctx->scantable.permutated, qmat);
332
+    flush_put_bits(pb);
333
+
334
+    return (put_bits_count(pb) - saved_pos) >> 3;
335
+}
336
+
337
+static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
338
+                        PutBitContext *pb,
339
+                        int sizes[4], int x, int y, int quant,
340
+                        int mbs_per_slice)
341
+{
342
+    ProresContext *ctx = avctx->priv_data;
343
+    int i, xp, yp;
344
+    int total_size = 0;
345
+    const uint16_t *src;
346
+    int slice_width_factor = av_log2(mbs_per_slice);
347
+    int num_cblocks, pwidth;
348
+    int plane_factor, is_chroma;
349
+
350
+    for (i = 0; i < ctx->num_planes; i++) {
351
+        is_chroma    = (i == 1 || i == 2);
352
+        plane_factor = slice_width_factor + 2;
353
+        if (is_chroma)
354
+            plane_factor += ctx->chroma_factor - 3;
355
+        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
356
+            xp          = x << 4;
357
+            yp          = y << 4;
358
+            num_cblocks = 4;
359
+            pwidth      = avctx->width;
360
+        } else {
361
+            xp          = x << 3;
362
+            yp          = y << 4;
363
+            num_cblocks = 2;
364
+            pwidth      = avctx->width >> 1;
365
+        }
366
+        src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
367
+
368
+        get_slice_data(ctx, src, pic->linesize[i], xp, yp,
369
+                       pwidth, avctx->height, ctx->blocks[0],
370
+                       mbs_per_slice, num_cblocks);
371
+        sizes[i] = encode_slice_plane(ctx, pb, src, pic->linesize[i],
372
+                                      mbs_per_slice, ctx->blocks[0],
373
+                                      num_cblocks, plane_factor,
374
+                                      ctx->quants[quant]);
375
+        total_size += sizes[i];
376
+    }
377
+    return total_size;
378
+}
379
+
380
+static inline int estimate_vlc(uint8_t codebook, int val)
381
+{
382
+    unsigned int rice_order, exp_order, switch_bits, switch_val;
383
+    int exponent;
384
+
385
+    /* number of prefix bits to switch between Rice and expGolomb */
386
+    switch_bits = (codebook & 3) + 1;
387
+    rice_order  =  codebook >> 5;       /* rice code order */
388
+    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
389
+
390
+    switch_val  = switch_bits << rice_order;
391
+
392
+    if (val >= switch_val) {
393
+        val -= switch_val - (1 << exp_order);
394
+        exponent = av_log2(val);
395
+
396
+        return exponent * 2 - exp_order + switch_bits + 1;
397
+    } else {
398
+        return (val >> rice_order) + rice_order + 1;
399
+    }
400
+}
401
+
402
+static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
403
+                        int scale)
404
+{
405
+    int i;
406
+    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
407
+    int bits;
408
+
409
+    prev_dc  = (blocks[0] - 0x4000) / scale;
410
+    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
411
+    codebook = 3;
412
+    blocks  += 64;
413
+    *error  += FFABS(blocks[0] - 0x4000) % scale;
414
+
415
+    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
416
+        dc       = (blocks[0] - 0x4000) / scale;
417
+        *error  += FFABS(blocks[0] - 0x4000) % scale;
418
+        delta    = dc - prev_dc;
419
+        new_sign = GET_SIGN(delta);
420
+        delta    = (delta ^ sign) - sign;
421
+        code     = MAKE_CODE(delta);
422
+        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
423
+        codebook = (code + (code & 1)) >> 1;
424
+        codebook = FFMIN(codebook, 3);
425
+        sign     = new_sign;
426
+        prev_dc  = dc;
427
+    }
428
+
429
+    return bits;
430
+}
431
+
432
+static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
433
+                        int plane_size_factor,
434
+                        const uint8_t *scan, const int16_t *qmat)
435
+{
436
+    int idx, i;
437
+    int run, level, run_cb, lev_cb;
438
+    int max_coeffs, abs_level;
439
+    int bits = 0;
440
+
441
+    max_coeffs = blocks_per_slice << 6;
442
+    run_cb     = ff_prores_run_to_cb_index[4];
443
+    lev_cb     = ff_prores_lev_to_cb_index[2];
444
+    run        = 0;
445
+
446
+    for (i = 1; i < 64; i++) {
447
+        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
448
+            level   = blocks[idx] / qmat[scan[i]];
449
+            *error += FFABS(blocks[idx]) % qmat[scan[i]];
450
+            if (level) {
451
+                abs_level = FFABS(level);
452
+                bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
453
+                bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
454
+                                     abs_level - 1) + 1;
455
+
456
+                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
457
+                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
458
+                run    = 0;
459
+            } else {
460
+                run++;
461
+            }
462
+        }
463
+    }
464
+
465
+    return bits;
466
+}
467
+
468
+static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
469
+                                const uint16_t *src, int linesize,
470
+                                int mbs_per_slice,
471
+                                int blocks_per_mb, int plane_size_factor,
472
+                                const int16_t *qmat)
473
+{
474
+    int blocks_per_slice;
475
+    int bits;
476
+
477
+    blocks_per_slice = mbs_per_slice * blocks_per_mb;
478
+
479
+    bits  = estimate_dcs(error, ctx->blocks[plane], blocks_per_slice, qmat[0]);
480
+    bits += estimate_acs(error, ctx->blocks[plane], blocks_per_slice,
481
+                         plane_size_factor, ctx->scantable.permutated, qmat);
482
+
483
+    return FFALIGN(bits, 8);
484
+}
485
+
486
+static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
487
+                            int trellis_node, int x, int y, int mbs_per_slice)
488
+{
489
+    ProresContext *ctx = avctx->priv_data;
490
+    int i, q, pq, xp, yp;
491
+    const uint16_t *src;
492
+    int slice_width_factor = av_log2(mbs_per_slice);
493
+    int num_cblocks[MAX_PLANES], pwidth;
494
+    int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
495
+    const int min_quant = ctx->profile_info->min_quant;
496
+    const int max_quant = ctx->profile_info->max_quant;
497
+    int error, bits, bits_limit;
498
+    int mbs, prev, cur, new_score;
499
+    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
500
+
501
+    mbs = x + mbs_per_slice;
502
+
503
+    for (i = 0; i < ctx->num_planes; i++) {
504
+        is_chroma[i]    = (i == 1 || i == 2);
505
+        plane_factor[i] = slice_width_factor + 2;
506
+        if (is_chroma[i])
507
+            plane_factor[i] += ctx->chroma_factor - 3;
508
+        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
509
+            xp             = x << 4;
510
+            yp             = y << 4;
511
+            num_cblocks[i] = 4;
512
+            pwidth         = avctx->width;
513
+        } else {
514
+            xp             = x << 3;
515
+            yp             = y << 4;
516
+            num_cblocks[i] = 2;
517
+            pwidth         = avctx->width >> 1;
518
+        }
519
+        src = (const uint16_t*)(pic->data[i] + yp * pic->linesize[i]) + xp;
520
+
521
+        get_slice_data(ctx, src, pic->linesize[i], xp, yp,
522
+                       pwidth, avctx->height, ctx->blocks[i],
523
+                       mbs_per_slice, num_cblocks[i]);
524
+    }
525
+
526
+    for (q = min_quant; q <= max_quant; q++) {
527
+        ctx->nodes[trellis_node + q].prev_node = -1;
528
+        ctx->nodes[trellis_node + q].quant     = q;
529
+    }
530
+
531
+    // todo: maybe perform coarser quantising to fit into frame size when needed
532
+    for (q = min_quant; q <= max_quant; q++) {
533
+        bits  = 0;
534
+        error = 0;
535
+        for (i = 0; i < ctx->num_planes; i++) {
536
+            bits += estimate_slice_plane(ctx, &error, i,
537
+                                         src, pic->linesize[i],
538
+                                         mbs_per_slice,
539
+                                         num_cblocks[i], plane_factor[i],
540
+                                         ctx->quants[q]);
541
+        }
542
+        if (bits > 65000 * 8) {
543
+            error = SCORE_LIMIT;
544
+            break;
545
+        }
546
+        slice_bits[q]  = bits;
547
+        slice_score[q] = error;
548
+    }
549
+
550
+    bits_limit = mbs * ctx->bits_per_mb;
551
+    for (pq = min_quant; pq <= max_quant; pq++) {
552
+        prev = trellis_node - TRELLIS_WIDTH + pq;
553
+
554
+        for (q = min_quant; q <= max_quant; q++) {
555
+            cur = trellis_node + q;
556
+
557
+            bits  = ctx->nodes[prev].bits + slice_bits[q];
558
+            error = slice_score[q];
559
+            if (bits > bits_limit)
560
+                error = SCORE_LIMIT;
561
+
562
+            if (ctx->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
563
+                new_score = ctx->nodes[prev].score + error;
564
+            else
565
+                new_score = SCORE_LIMIT;
566
+            if (ctx->nodes[cur].prev_node == -1 ||
567
+                ctx->nodes[cur].score >= new_score) {
568
+
569
+                ctx->nodes[cur].bits      = bits;
570
+                ctx->nodes[cur].score     = new_score;
571
+                ctx->nodes[cur].prev_node = prev;
572
+            }
573
+        }
574
+    }
575
+
576
+    error = ctx->nodes[trellis_node + min_quant].score;
577
+    pq    = trellis_node + min_quant;
578
+    for (q = min_quant + 1; q <= max_quant; q++) {
579
+        if (ctx->nodes[trellis_node + q].score <= error) {
580
+            error = ctx->nodes[trellis_node + q].score;
581
+            pq    = trellis_node + q;
582
+        }
583
+    }
584
+
585
+    return pq;
586
+}
587
+
588
+static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
589
+                        const AVFrame *pic, int *got_packet)
590
+{
591
+    ProresContext *ctx = avctx->priv_data;
592
+    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
593
+    uint8_t *picture_size_pos;
594
+    PutBitContext pb;
595
+    int x, y, i, mb, q = 0;
596
+    int sizes[4] = { 0 };
597
+    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
598
+    int frame_size, picture_size, slice_size;
599
+    int mbs_per_slice = ctx->mbs_per_slice;
600
+    int pkt_size, ret;
601
+
602
+    *avctx->coded_frame           = *pic;
603
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
604
+    avctx->coded_frame->key_frame = 1;
605
+
606
+    pkt_size = ctx->mb_width * ctx->mb_height * 64 * 3 * 12
607
+               + ctx->num_slices * 2 + 200 + FF_MIN_BUFFER_SIZE;
608
+
609
+    if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
610
+        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
611
+        return ret;
612
+    }
613
+
614
+    orig_buf = pkt->data;
615
+
616
+    // frame atom
617
+    orig_buf += 4;                              // frame size
618
+    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
619
+    buf = orig_buf;
620
+
621
+    // frame header
622
+    tmp = buf;
623
+    buf += 2;                                   // frame header size will be stored here
624
+    bytestream_put_be16  (&buf, 0);             // version 1
625
+    bytestream_put_buffer(&buf, "Lavc", 4);     // creator
626
+    bytestream_put_be16  (&buf, avctx->width);
627
+    bytestream_put_be16  (&buf, avctx->height);
628
+    bytestream_put_byte  (&buf, ctx->chroma_factor << 6); // frame flags
629
+    bytestream_put_byte  (&buf, 0);             // reserved
630
+    bytestream_put_byte  (&buf, 0);             // primaries
631
+    bytestream_put_byte  (&buf, 0);             // transfer function
632
+    bytestream_put_byte  (&buf, 6);             // colour matrix - ITU-R BT.601-4
633
+    bytestream_put_byte  (&buf, 0x40);          // source format and alpha information
634
+    bytestream_put_byte  (&buf, 0);             // reserved
635
+    bytestream_put_byte  (&buf, 0x03);          // matrix flags - both matrices are present
636
+    // luma quantisation matrix
637
+    for (i = 0; i < 64; i++)
638
+        bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
639
+    // chroma quantisation matrix
640
+    for (i = 0; i < 64; i++)
641
+        bytestream_put_byte(&buf, ctx->profile_info->quant[i]);
642
+    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
643
+
644
+    // picture header
645
+    picture_size_pos = buf + 1;
646
+    bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
647
+    buf += 4;                                   // picture data size will be stored here
648
+    bytestream_put_be16  (&buf, ctx->num_slices); // total number of slices
649
+    bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
650
+
651
+    // seek table - will be filled during slice encoding
652
+    slice_sizes = buf;
653
+    buf += ctx->num_slices * 2;
654
+
655
+    // slices
656
+    for (y = 0; y < ctx->mb_height; y++) {
657
+        mbs_per_slice = ctx->mbs_per_slice;
658
+        for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
659
+            while (ctx->mb_width - x < mbs_per_slice)
660
+                mbs_per_slice >>= 1;
661
+            q = find_slice_quant(avctx, pic, (mb + 1) * TRELLIS_WIDTH, x, y,
662
+                                 mbs_per_slice);
663
+        }
664
+
665
+        for (x = ctx->slices_width - 1; x >= 0; x--) {
666
+            ctx->slice_q[x] = ctx->nodes[q].quant;
667
+            q = ctx->nodes[q].prev_node;
668
+        }
669
+
670
+        mbs_per_slice = ctx->mbs_per_slice;
671
+        for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
672
+            q = ctx->slice_q[mb];
673
+
674
+            while (ctx->mb_width - x < mbs_per_slice)
675
+                mbs_per_slice >>= 1;
676
+
677
+            bytestream_put_byte(&buf, slice_hdr_size << 3);
678
+            slice_hdr = buf;
679
+            buf += slice_hdr_size - 1;
680
+            init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
681
+            encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
682
+
683
+            bytestream_put_byte(&slice_hdr, q);
684
+            slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
685
+            for (i = 0; i < ctx->num_planes - 1; i++) {
686
+                bytestream_put_be16(&slice_hdr, sizes[i]);
687
+                slice_size += sizes[i];
688
+            }
689
+            bytestream_put_be16(&slice_sizes, slice_size);
690
+            buf += slice_size - slice_hdr_size;
691
+        }
692
+    }
693
+
694
+    orig_buf -= 8;
695
+    frame_size = buf - orig_buf;
696
+    picture_size = buf - picture_size_pos - 6;
697
+    bytestream_put_be32(&orig_buf, frame_size);
698
+    bytestream_put_be32(&picture_size_pos, picture_size);
699
+
700
+    pkt->size   = frame_size;
701
+    pkt->flags |= AV_PKT_FLAG_KEY;
702
+    *got_packet = 1;
703
+
704
+    return 0;
705
+}
706
+
707
+static av_cold int encode_close(AVCodecContext *avctx)
708
+{
709
+    ProresContext *ctx = avctx->priv_data;
710
+
711
+    if (avctx->coded_frame->data[0])
712
+        avctx->release_buffer(avctx, avctx->coded_frame);
713
+
714
+    av_freep(&avctx->coded_frame);
715
+
716
+    av_freep(&ctx->nodes);
717
+    av_freep(&ctx->slice_q);
718
+
719
+    return 0;
720
+}
721
+
722
+static av_cold int encode_init(AVCodecContext *avctx)
723
+{
724
+    ProresContext *ctx = avctx->priv_data;
725
+    int mps;
726
+    int i, j;
727
+    int min_quant, max_quant;
728
+
729
+    avctx->bits_per_raw_sample = 10;
730
+    avctx->coded_frame = avcodec_alloc_frame();
731
+    if (!avctx->coded_frame)
732
+        return AVERROR(ENOMEM);
733
+
734
+    ff_proresdsp_init(&ctx->dsp);
735
+    ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
736
+                      ff_prores_progressive_scan);
737
+
738
+    mps = ctx->mbs_per_slice;
739
+    if (mps & (mps - 1)) {
740
+        av_log(avctx, AV_LOG_ERROR,
741
+               "there should be an integer power of two MBs per slice\n");
742
+        return AVERROR(EINVAL);
743
+    }
744
+
745
+    ctx->chroma_factor = avctx->pix_fmt == PIX_FMT_YUV422P10
746
+                         ? CFACTOR_Y422
747
+                         : CFACTOR_Y444;
748
+    ctx->profile_info  = prores_profile_info + ctx->profile;
749
+    ctx->num_planes    = 3;
750
+
751
+    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
752
+    ctx->mb_height     = FFALIGN(avctx->height, 16) >> 4;
753
+    ctx->slices_width  = ctx->mb_width / mps;
754
+    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
755
+    ctx->num_slices    = ctx->mb_height * ctx->slices_width;
756
+
757
+    for (i = 0; i < NUM_MB_LIMITS - 1; i++)
758
+        if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height)
759
+            break;
760
+    ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
761
+
762
+    min_quant = ctx->profile_info->min_quant;
763
+    max_quant = ctx->profile_info->max_quant;
764
+    for (i = min_quant; i <= max_quant; i++) {
765
+        for (j = 0; j < 64; j++)
766
+            ctx->quants[i][j] = ctx->profile_info->quant[j] * i;
767
+    }
768
+
769
+    avctx->codec_tag   = ctx->profile_info->tag;
770
+
771
+    av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices, %d bits per MB\n",
772
+           ctx->profile, ctx->num_slices, ctx->bits_per_mb);
773
+
774
+    ctx->nodes = av_malloc((ctx->slices_width + 1) * TRELLIS_WIDTH
775
+                           * sizeof(*ctx->nodes));
776
+    if (!ctx->nodes) {
777
+        encode_close(avctx);
778
+        return AVERROR(ENOMEM);
779
+    }
780
+    for (i = min_quant; i <= max_quant; i++) {
781
+        ctx->nodes[i].prev_node = -1;
782
+        ctx->nodes[i].bits      = 0;
783
+        ctx->nodes[i].score     = 0;
784
+    }
785
+
786
+    ctx->slice_q = av_malloc(ctx->slices_width * sizeof(*ctx->slice_q));
787
+    if (!ctx->slice_q) {
788
+        encode_close(avctx);
789
+        return AVERROR(ENOMEM);
790
+    }
791
+
792
+    return 0;
793
+}
794
+
795
+#define OFFSET(x) offsetof(ProresContext, x)
796
+#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
797
+
798
+static const AVOption options[] = {
799
+    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
800
+        AV_OPT_TYPE_INT, { 8 }, 1, MAX_MBS_PER_SLICE, VE },
801
+    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
802
+        { PRORES_PROFILE_STANDARD },
803
+        PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
804
+    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_PROXY },
805
+        0, 0, VE, "profile" },
806
+    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_LT },
807
+        0, 0, VE, "profile" },
808
+    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_STANDARD },
809
+        0, 0, VE, "profile" },
810
+    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { PRORES_PROFILE_HQ },
811
+        0, 0, VE, "profile" },
812
+    { NULL }
813
+};
814
+
815
+static const AVClass proresenc_class = {
816
+    .class_name = "ProRes encoder",
817
+    .item_name  = av_default_item_name,
818
+    .option     = options,
819
+    .version    = LIBAVUTIL_VERSION_INT,
820
+};
821
+
822
+AVCodec ff_prores_encoder = {
823
+    .name           = "prores",
824
+    .type           = AVMEDIA_TYPE_VIDEO,
825
+    .id             = CODEC_ID_PRORES,
826
+    .priv_data_size = sizeof(ProresContext),
827
+    .init           = encode_init,
828
+    .close          = encode_close,
829
+    .encode2        = encode_frame,
830
+    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
831
+    .pix_fmts       = (const enum PixelFormat[]) {
832
+                          PIX_FMT_YUV422P10, PIX_FMT_YUV444P10, PIX_FMT_NONE
833
+                      },
834
+    .priv_class     = &proresenc_class,
835
+};
... ...
@@ -21,7 +21,7 @@
21 21
 #define AVCODEC_VERSION_H
22 22
 
23 23
 #define LIBAVCODEC_VERSION_MAJOR 54
24
-#define LIBAVCODEC_VERSION_MINOR  1
24
+#define LIBAVCODEC_VERSION_MINOR  2
25 25
 #define LIBAVCODEC_VERSION_MICRO  0
26 26
 
27 27
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \