libavcodec/proresenc_kostya.c
1a265f61
 /*
  * Apple ProRes encoder
  *
  * Copyright (c) 2012 Konstantin Shishkov
  *
76e8b336
  * This encoder appears to be based on Anatoliy Wassermans considering
  * similarities in the bugs.
  *
d814a839
  * This file is part of FFmpeg.
1a265f61
  *
d814a839
  * FFmpeg is free software; you can redistribute it and/or
1a265f61
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
d814a839
  * FFmpeg is distributed in the hope that it will be useful,
1a265f61
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
d814a839
  * License along with FFmpeg; if not, write to the Free Software
1a265f61
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/opt.h"
0b0953ba
 #include "libavutil/pixdesc.h"
1a265f61
 #include "avcodec.h"
a9aee08d
 #include "fdctdsp.h"
1a265f61
 #include "put_bits.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "proresdata.h"
 
 #define CFACTOR_Y422 2
 #define CFACTOR_Y444 3
 
 #define MAX_MBS_PER_SLICE 8
 
0b0953ba
 #define MAX_PLANES 4
1a265f61
 
 enum {
a8087336
     PRORES_PROFILE_AUTO  = -1,
1a265f61
     PRORES_PROFILE_PROXY = 0,
     PRORES_PROFILE_LT,
     PRORES_PROFILE_STANDARD,
     PRORES_PROFILE_HQ,
0b0953ba
     PRORES_PROFILE_4444,
1a265f61
 };
 
4db4b53d
 enum {
     QUANT_MAT_PROXY = 0,
     QUANT_MAT_LT,
     QUANT_MAT_STANDARD,
     QUANT_MAT_HQ,
     QUANT_MAT_DEFAULT,
 };
 
 static const uint8_t prores_quant_matrices[][64] = {
     { // proxy
          4,  7,  9, 11, 13, 14, 15, 63,
          7,  7, 11, 12, 14, 15, 63, 63,
          9, 11, 13, 14, 15, 63, 63, 63,
         11, 11, 13, 14, 63, 63, 63, 63,
         11, 13, 14, 63, 63, 63, 63, 63,
         13, 14, 63, 63, 63, 63, 63, 63,
         13, 63, 63, 63, 63, 63, 63, 63,
         63, 63, 63, 63, 63, 63, 63, 63,
     },
     { // LT
          4,  5,  6,  7,  9, 11, 13, 15,
          5,  5,  7,  8, 11, 13, 15, 17,
          6,  7,  9, 11, 13, 15, 15, 17,
          7,  7,  9, 11, 13, 15, 17, 19,
          7,  9, 11, 13, 14, 16, 19, 23,
          9, 11, 13, 14, 16, 19, 23, 29,
          9, 11, 13, 15, 17, 21, 28, 35,
         11, 13, 16, 17, 21, 28, 35, 41,
     },
     { // standard
          4,  4,  5,  5,  6,  7,  7,  9,
          4,  4,  5,  6,  7,  7,  9,  9,
          5,  5,  6,  7,  7,  9,  9, 10,
          5,  5,  6,  7,  7,  9,  9, 10,
          5,  6,  7,  7,  8,  9, 10, 12,
          6,  7,  7,  8,  9, 10, 12, 15,
          6,  7,  7,  9, 10, 11, 14, 17,
          7,  7,  9, 10, 11, 14, 17, 21,
     },
     { // high quality
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  5,
          4,  4,  4,  4,  4,  4,  5,  5,
          4,  4,  4,  4,  4,  5,  5,  6,
          4,  4,  4,  4,  5,  5,  6,  7,
          4,  4,  4,  4,  5,  6,  7,  7,
     },
     { // codec default
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  4,  4,  4,  4,  4,
     },
 };
 
1a265f61
 #define NUM_MB_LIMITS 4
 static const int prores_mb_limits[NUM_MB_LIMITS] = {
     1620, // up to 720x576
     2700, // up to 960x720
     6075, // up to 1440x1080
     9216, // up to 2048x1152
 };
 
 static const struct prores_profile {
     const char *full_name;
     uint32_t    tag;
     int         min_quant;
     int         max_quant;
     int         br_tab[NUM_MB_LIMITS];
4db4b53d
     int         quant;
0b0953ba
 } prores_profile_info[5] = {
1a265f61
     {
         .full_name = "proxy",
         .tag       = MKTAG('a', 'p', 'c', 'o'),
         .min_quant = 4,
         .max_quant = 8,
         .br_tab    = { 300, 242, 220, 194 },
4db4b53d
         .quant     = QUANT_MAT_PROXY,
1a265f61
     },
     {
         .full_name = "LT",
         .tag       = MKTAG('a', 'p', 'c', 's'),
         .min_quant = 1,
         .max_quant = 9,
         .br_tab    = { 720, 560, 490, 440 },
4db4b53d
         .quant     = QUANT_MAT_LT,
1a265f61
     },
     {
         .full_name = "standard",
         .tag       = MKTAG('a', 'p', 'c', 'n'),
         .min_quant = 1,
         .max_quant = 6,
         .br_tab    = { 1050, 808, 710, 632 },
4db4b53d
         .quant     = QUANT_MAT_STANDARD,
1a265f61
     },
     {
         .full_name = "high quality",
         .tag       = MKTAG('a', 'p', 'c', 'h'),
         .min_quant = 1,
         .max_quant = 6,
         .br_tab    = { 1566, 1216, 1070, 950 },
4db4b53d
         .quant     = QUANT_MAT_HQ,
0b0953ba
     },
     {
         .full_name = "4444",
         .tag       = MKTAG('a', 'p', '4', 'h'),
         .min_quant = 1,
         .max_quant = 6,
         .br_tab    = { 2350, 1828, 1600, 1425 },
         .quant     = QUANT_MAT_HQ,
1a265f61
     }
 };
 
 #define TRELLIS_WIDTH 16
 #define SCORE_LIMIT   INT_MAX / 2
 
 struct TrellisNode {
     int prev_node;
     int quant;
     int bits;
     int score;
 };
 
6d702dc0
 #define MAX_STORED_Q 16
 
83632cbb
 typedef struct ProresThreadData {
88bd7fdc
     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
83632cbb
     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
     int16_t custom_q[64];
     struct TrellisNode *nodes;
 } ProresThreadData;
 
1a265f61
 typedef struct ProresContext {
     AVClass *class;
88bd7fdc
     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
1a265f61
     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
6d702dc0
     int16_t quants[MAX_STORED_Q][64];
     int16_t custom_q[64];
4db4b53d
     const uint8_t *quant_mat;
d6acefe0
     const uint8_t *scantable;
1a265f61
 
a9aee08d
     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
3fd22538
                  ptrdiff_t linesize, int16_t *block);
a9aee08d
     FDCTDSPContext fdsp;
1a265f61
 
05fb4c9a
     const AVFrame *pic;
1a265f61
     int mb_width, mb_height;
     int mbs_per_slice;
     int num_chroma_blocks, chroma_factor;
     int slices_width;
c8e186fa
     int slices_per_picture;
     int pictures_per_frame; // 1 for progressive, 2 for interlaced
     int cur_picture_idx;
1a265f61
     int num_planes;
     int bits_per_mb;
6acac061
     int force_quant;
0b0953ba
     int alpha_bits;
45ce880a
     int warn;
1a265f61
 
4db4b53d
     char *vendor;
     int quant_sel;
 
c8e186fa
     int frame_size_upper_bound;
6d702dc0
 
1a265f61
     int profile;
     const struct prores_profile *profile_info;
 
     int *slice_q;
83632cbb
 
     ProresThreadData *tdata;
1a265f61
 } ProresContext;
 
 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
3fd22538
                            ptrdiff_t linesize, int x, int y, int w, int h,
88bd7fdc
                            int16_t *blocks, uint16_t *emu_buf,
235d6932
                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
1a265f61
 {
     const uint16_t *esrc;
     const int mb_width = 4 * blocks_per_mb;
3fd22538
     ptrdiff_t elinesize;
1a265f61
     int i, j, k;
 
     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
         if (x >= w) {
             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
                               * sizeof(*blocks));
             return;
         }
         if (x + mb_width <= w && y + 16 <= h) {
             esrc      = src;
             elinesize = linesize;
         } else {
             int bw, bh, pix;
 
83632cbb
             esrc      = emu_buf;
             elinesize = 16 * sizeof(*emu_buf);
1a265f61
 
             bw = FFMIN(w - x, mb_width);
             bh = FFMIN(h - y, 16);
 
             for (j = 0; j < bh; j++) {
83632cbb
                 memcpy(emu_buf + j * 16,
c7084182
                        (const uint8_t*)src + j * linesize,
1a265f61
                        bw * sizeof(*src));
83632cbb
                 pix = emu_buf[j * 16 + bw - 1];
1a265f61
                 for (k = bw; k < mb_width; k++)
83632cbb
                     emu_buf[j * 16 + k] = pix;
1a265f61
             }
             for (; j < 16; j++)
83632cbb
                 memcpy(emu_buf + j * 16,
                        emu_buf + (bh - 1) * 16,
                        mb_width * sizeof(*emu_buf));
1a265f61
         }
235d6932
         if (!is_chroma) {
a9aee08d
             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
1a265f61
             blocks += 64;
235d6932
             if (blocks_per_mb > 2) {
a9aee08d
                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
235d6932
                 blocks += 64;
             }
a9aee08d
             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
1a265f61
             blocks += 64;
235d6932
             if (blocks_per_mb > 2) {
a9aee08d
                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
235d6932
                 blocks += 64;
             }
         } else {
a9aee08d
             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
235d6932
             blocks += 64;
a9aee08d
             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
235d6932
             blocks += 64;
             if (blocks_per_mb > 2) {
a9aee08d
                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
235d6932
                 blocks += 64;
a9aee08d
                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
235d6932
                 blocks += 64;
             }
1a265f61
         }
 
         x += mb_width;
     }
 }
 
0b0953ba
 static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
3fd22538
                            ptrdiff_t linesize, int x, int y, int w, int h,
0b0953ba
                            int16_t *blocks, int mbs_per_slice, int abits)
 {
     const int slice_width = 16 * mbs_per_slice;
     int i, j, copy_w, copy_h;
 
     copy_w = FFMIN(w - x, slice_width);
     copy_h = FFMIN(h - y, 16);
     for (i = 0; i < copy_h; i++) {
         memcpy(blocks, src, copy_w * sizeof(*src));
         if (abits == 8)
             for (j = 0; j < copy_w; j++)
                 blocks[j] >>= 2;
         else
             for (j = 0; j < copy_w; j++)
                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
         for (j = copy_w; j < slice_width; j++)
             blocks[j] = blocks[copy_w - 1];
         blocks += slice_width;
         src    += linesize >> 1;
     }
     for (; i < 16; i++) {
         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
         blocks += slice_width;
     }
 }
 
1a265f61
 /**
  * Write an unsigned rice/exp golomb codeword.
  */
b5696ff2
 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
1a265f61
 {
     unsigned int rice_order, exp_order, switch_bits, switch_val;
     int exponent;
 
     /* number of prefix bits to switch between Rice and expGolomb */
     switch_bits = (codebook & 3) + 1;
     rice_order  =  codebook >> 5;       /* rice code order */
     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 
     switch_val  = switch_bits << rice_order;
 
     if (val >= switch_val) {
         val -= switch_val - (1 << exp_order);
         exponent = av_log2(val);
 
         put_bits(pb, exponent - exp_order + switch_bits, 0);
d40132da
         put_bits(pb, exponent + 1, val);
1a265f61
     } else {
         exponent = val >> rice_order;
 
         if (exponent)
             put_bits(pb, exponent, 0);
         put_bits(pb, 1, 1);
         if (rice_order)
             put_sbits(pb, rice_order, val);
     }
 }
 
 #define GET_SIGN(x)  ((x) >> 31)
 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
 
88bd7fdc
 static void encode_dcs(PutBitContext *pb, int16_t *blocks,
1a265f61
                        int blocks_per_slice, int scale)
 {
     int i;
     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
 
     prev_dc = (blocks[0] - 0x4000) / scale;
     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
17b11ffe
     sign     = 0;
1a265f61
     codebook = 3;
     blocks  += 64;
 
     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
         dc       = (blocks[0] - 0x4000) / scale;
         delta    = dc - prev_dc;
         new_sign = GET_SIGN(delta);
         delta    = (delta ^ sign) - sign;
         code     = MAKE_CODE(delta);
         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
         codebook = (code + (code & 1)) >> 1;
         codebook = FFMIN(codebook, 3);
         sign     = new_sign;
         prev_dc  = dc;
     }
 }
 
88bd7fdc
 static void encode_acs(PutBitContext *pb, int16_t *blocks,
1a265f61
                        int blocks_per_slice,
                        int plane_size_factor,
                        const uint8_t *scan, const int16_t *qmat)
 {
     int idx, i;
     int run, level, run_cb, lev_cb;
     int max_coeffs, abs_level;
 
     max_coeffs = blocks_per_slice << 6;
     run_cb     = ff_prores_run_to_cb_index[4];
     lev_cb     = ff_prores_lev_to_cb_index[2];
     run        = 0;
 
     for (i = 1; i < 64; i++) {
         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
             level = blocks[idx] / qmat[scan[i]];
             if (level) {
                 abs_level = FFABS(level);
                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
                                     abs_level - 1);
                 put_sbits(pb, 1, GET_SIGN(level));
 
                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
                 run    = 0;
             } else {
                 run++;
             }
         }
     }
 }
 
 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
3fd22538
                               const uint16_t *src, ptrdiff_t linesize,
88bd7fdc
                               int mbs_per_slice, int16_t *blocks,
1a265f61
                               int blocks_per_mb, int plane_size_factor,
                               const int16_t *qmat)
 {
     int blocks_per_slice, saved_pos;
 
     saved_pos = put_bits_count(pb);
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
d6acefe0
                ctx->scantable, qmat);
1a265f61
     flush_put_bits(pb);
 
     return (put_bits_count(pb) - saved_pos) >> 3;
 }
 
0b0953ba
 static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
 {
     const int dbits = (abits == 8) ? 4 : 7;
     const int dsize = 1 << dbits - 1;
     int diff = cur - prev;
 
ba625dd8
     diff = av_mod_uintp2(diff, abits);
0b0953ba
     if (diff >= (1 << abits) - dsize)
         diff -= 1 << abits;
     if (diff < -dsize || diff > dsize || !diff) {
         put_bits(pb, 1, 1);
         put_bits(pb, abits, diff);
     } else {
         put_bits(pb, 1, 0);
         put_bits(pb, dbits - 1, FFABS(diff) - 1);
         put_bits(pb, 1, diff < 0);
     }
 }
 
 static void put_alpha_run(PutBitContext *pb, int run)
 {
     if (run) {
         put_bits(pb, 1, 0);
         if (run < 0x10)
             put_bits(pb, 4, run);
         else
             put_bits(pb, 15, run);
     } else {
         put_bits(pb, 1, 1);
     }
 }
 
 // todo alpha quantisation for high quants
 static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
                               int mbs_per_slice, uint16_t *blocks,
                               int quant)
 {
     const int abits = ctx->alpha_bits;
     const int mask  = (1 << abits) - 1;
     const int num_coeffs = mbs_per_slice * 256;
     int saved_pos = put_bits_count(pb);
     int prev = mask, cur;
     int idx = 0;
     int run = 0;
 
     cur = blocks[idx++];
     put_alpha_diff(pb, cur, prev, abits);
     prev = cur;
     do {
         cur = blocks[idx++];
         if (cur != prev) {
             put_alpha_run (pb, run);
             put_alpha_diff(pb, cur, prev, abits);
             prev = cur;
             run  = 0;
         } else {
             run++;
         }
     } while (idx < num_coeffs);
     if (run)
         put_alpha_run(pb, run);
     flush_put_bits(pb);
     return (put_bits_count(pb) - saved_pos) >> 3;
 }
 
1a265f61
 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
                         PutBitContext *pb,
                         int sizes[4], int x, int y, int quant,
                         int mbs_per_slice)
 {
     ProresContext *ctx = avctx->priv_data;
     int i, xp, yp;
     int total_size = 0;
     const uint16_t *src;
     int slice_width_factor = av_log2(mbs_per_slice);
3fd22538
     int num_cblocks, pwidth, line_add;
     ptrdiff_t linesize;
1a265f61
     int plane_factor, is_chroma;
6d702dc0
     uint16_t *qmat;
 
c0f4cf77
     if (ctx->pictures_per_frame == 1)
         line_add = 0;
     else
         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
 
6acac061
     if (ctx->force_quant) {
         qmat = ctx->quants[0];
     } else if (quant < MAX_STORED_Q) {
6d702dc0
         qmat = ctx->quants[quant];
     } else {
         qmat = ctx->custom_q;
         for (i = 0; i < 64; i++)
4db4b53d
             qmat[i] = ctx->quant_mat[i] * quant;
6d702dc0
     }
1a265f61
 
     for (i = 0; i < ctx->num_planes; i++) {
         is_chroma    = (i == 1 || i == 2);
         plane_factor = slice_width_factor + 2;
         if (is_chroma)
             plane_factor += ctx->chroma_factor - 3;
         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
             xp          = x << 4;
             yp          = y << 4;
             num_cblocks = 4;
             pwidth      = avctx->width;
         } else {
             xp          = x << 3;
             yp          = y << 4;
             num_cblocks = 2;
             pwidth      = avctx->width >> 1;
         }
 
c8e186fa
         linesize = pic->linesize[i] * ctx->pictures_per_frame;
c0f4cf77
         src = (const uint16_t*)(pic->data[i] + yp * linesize +
                                 line_add * pic->linesize[i]) + xp;
c8e186fa
 
0b0953ba
         if (i < 3) {
             get_slice_data(ctx, src, linesize, xp, yp,
                            pwidth, avctx->height / ctx->pictures_per_frame,
                            ctx->blocks[0], ctx->emu_buf,
                            mbs_per_slice, num_cblocks, is_chroma);
             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
                                           mbs_per_slice, ctx->blocks[0],
                                           num_cblocks, plane_factor,
                                           qmat);
         } else {
             get_alpha_data(ctx, src, linesize, xp, yp,
                            pwidth, avctx->height / ctx->pictures_per_frame,
                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
b16699f2
             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
                                           ctx->blocks[0], quant);
0b0953ba
         }
1a265f61
         total_size += sizes[i];
52b81ff4
         if (put_bits_left(pb) < 0) {
58b68e4f
             av_log(avctx, AV_LOG_ERROR,
                    "Underestimated required buffer size.\n");
             return AVERROR_BUG;
52b81ff4
         }
1a265f61
     }
     return total_size;
 }
 
b5696ff2
 static inline int estimate_vlc(unsigned codebook, int val)
1a265f61
 {
     unsigned int rice_order, exp_order, switch_bits, switch_val;
     int exponent;
 
     /* number of prefix bits to switch between Rice and expGolomb */
     switch_bits = (codebook & 3) + 1;
     rice_order  =  codebook >> 5;       /* rice code order */
     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
 
     switch_val  = switch_bits << rice_order;
 
     if (val >= switch_val) {
         val -= switch_val - (1 << exp_order);
         exponent = av_log2(val);
 
         return exponent * 2 - exp_order + switch_bits + 1;
     } else {
         return (val >> rice_order) + rice_order + 1;
     }
 }
 
88bd7fdc
 static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
1a265f61
                         int scale)
 {
     int i;
     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
     int bits;
 
     prev_dc  = (blocks[0] - 0x4000) / scale;
     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
17b11ffe
     sign     = 0;
1a265f61
     codebook = 3;
     blocks  += 64;
     *error  += FFABS(blocks[0] - 0x4000) % scale;
 
     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
         dc       = (blocks[0] - 0x4000) / scale;
         *error  += FFABS(blocks[0] - 0x4000) % scale;
         delta    = dc - prev_dc;
         new_sign = GET_SIGN(delta);
         delta    = (delta ^ sign) - sign;
         code     = MAKE_CODE(delta);
         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
         codebook = (code + (code & 1)) >> 1;
         codebook = FFMIN(codebook, 3);
         sign     = new_sign;
         prev_dc  = dc;
     }
 
     return bits;
 }
 
88bd7fdc
 static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
1a265f61
                         int plane_size_factor,
                         const uint8_t *scan, const int16_t *qmat)
 {
     int idx, i;
     int run, level, run_cb, lev_cb;
     int max_coeffs, abs_level;
     int bits = 0;
 
     max_coeffs = blocks_per_slice << 6;
     run_cb     = ff_prores_run_to_cb_index[4];
     lev_cb     = ff_prores_lev_to_cb_index[2];
     run        = 0;
 
     for (i = 1; i < 64; i++) {
         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
             level   = blocks[idx] / qmat[scan[i]];
             *error += FFABS(blocks[idx]) % qmat[scan[i]];
             if (level) {
                 abs_level = FFABS(level);
                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
                                      abs_level - 1) + 1;
 
                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
                 run    = 0;
             } else {
                 run++;
             }
         }
     }
 
     return bits;
 }
 
 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
3fd22538
                                 const uint16_t *src, ptrdiff_t linesize,
1a265f61
                                 int mbs_per_slice,
                                 int blocks_per_mb, int plane_size_factor,
83632cbb
                                 const int16_t *qmat, ProresThreadData *td)
1a265f61
 {
     int blocks_per_slice;
     int bits;
 
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
 
83632cbb
     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
d6acefe0
                          plane_size_factor, ctx->scantable, qmat);
1a265f61
 
     return FFALIGN(bits, 8);
 }
 
0b0953ba
 static int est_alpha_diff(int cur, int prev, int abits)
 {
     const int dbits = (abits == 8) ? 4 : 7;
     const int dsize = 1 << dbits - 1;
     int diff = cur - prev;
 
ba625dd8
     diff = av_mod_uintp2(diff, abits);
0b0953ba
     if (diff >= (1 << abits) - dsize)
         diff -= 1 << abits;
     if (diff < -dsize || diff > dsize || !diff)
         return abits + 1;
     else
         return dbits + 1;
 }
 
 static int estimate_alpha_plane(ProresContext *ctx, int *error,
3fd22538
                                 const uint16_t *src, ptrdiff_t linesize,
0b0953ba
                                 int mbs_per_slice, int quant,
                                 int16_t *blocks)
 {
     const int abits = ctx->alpha_bits;
     const int mask  = (1 << abits) - 1;
     const int num_coeffs = mbs_per_slice * 256;
     int prev = mask, cur;
     int idx = 0;
     int run = 0;
     int bits;
 
     *error = 0;
     cur = blocks[idx++];
     bits = est_alpha_diff(cur, prev, abits);
     prev = cur;
     do {
         cur = blocks[idx++];
         if (cur != prev) {
             if (!run)
                 bits++;
             else if (run < 0x10)
                 bits += 4;
             else
                 bits += 15;
             bits += est_alpha_diff(cur, prev, abits);
             prev = cur;
             run  = 0;
         } else {
             run++;
         }
     } while (idx < num_coeffs);
 
     if (run) {
         if (run < 0x10)
             bits += 4;
         else
             bits += 15;
     }
 
     return bits;
 }
 
05fb4c9a
 static int find_slice_quant(AVCodecContext *avctx,
83632cbb
                             int trellis_node, int x, int y, int mbs_per_slice,
                             ProresThreadData *td)
1a265f61
 {
     ProresContext *ctx = avctx->priv_data;
     int i, q, pq, xp, yp;
     const uint16_t *src;
     int slice_width_factor = av_log2(mbs_per_slice);
     int num_cblocks[MAX_PLANES], pwidth;
     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
     const int min_quant = ctx->profile_info->min_quant;
     const int max_quant = ctx->profile_info->max_quant;
     int error, bits, bits_limit;
     int mbs, prev, cur, new_score;
     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
6d702dc0
     int overquant;
     uint16_t *qmat;
c0f4cf77
     int linesize[4], line_add;
1a265f61
 
c0f4cf77
     if (ctx->pictures_per_frame == 1)
         line_add = 0;
     else
05fb4c9a
         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
1a265f61
     mbs = x + mbs_per_slice;
 
     for (i = 0; i < ctx->num_planes; i++) {
         is_chroma[i]    = (i == 1 || i == 2);
         plane_factor[i] = slice_width_factor + 2;
         if (is_chroma[i])
             plane_factor[i] += ctx->chroma_factor - 3;
         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
             xp             = x << 4;
             yp             = y << 4;
             num_cblocks[i] = 4;
             pwidth         = avctx->width;
         } else {
             xp             = x << 3;
             yp             = y << 4;
             num_cblocks[i] = 2;
             pwidth         = avctx->width >> 1;
         }
 
05fb4c9a
         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
                                  line_add * ctx->pic->linesize[i]) + xp;
c8e186fa
 
0b0953ba
         if (i < 3) {
             get_slice_data(ctx, src, linesize[i], xp, yp,
                            pwidth, avctx->height / ctx->pictures_per_frame,
                            td->blocks[i], td->emu_buf,
                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
         } else {
             get_alpha_data(ctx, src, linesize[i], xp, yp,
                            pwidth, avctx->height / ctx->pictures_per_frame,
                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
         }
1a265f61
     }
 
6d702dc0
     for (q = min_quant; q < max_quant + 2; q++) {
83632cbb
         td->nodes[trellis_node + q].prev_node = -1;
         td->nodes[trellis_node + q].quant     = q;
1a265f61
     }
 
     // todo: maybe perform coarser quantising to fit into frame size when needed
     for (q = min_quant; q <= max_quant; q++) {
         bits  = 0;
         error = 0;
0b0953ba
         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
1a265f61
             bits += estimate_slice_plane(ctx, &error, i,
c8e186fa
                                          src, linesize[i],
1a265f61
                                          mbs_per_slice,
                                          num_cblocks[i], plane_factor[i],
83632cbb
                                          ctx->quants[q], td);
1a265f61
         }
0b0953ba
         if (ctx->alpha_bits)
             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
                                          mbs_per_slice, q, td->blocks[3]);
4690e01c
         if (bits > 65000 * 8)
1a265f61
             error = SCORE_LIMIT;
4690e01c
 
1a265f61
         slice_bits[q]  = bits;
         slice_score[q] = error;
     }
6d702dc0
     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
         slice_bits[max_quant + 1]  = slice_bits[max_quant];
         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
         overquant = max_quant;
     } else {
         for (q = max_quant + 1; q < 128; q++) {
             bits  = 0;
             error = 0;
             if (q < MAX_STORED_Q) {
                 qmat = ctx->quants[q];
             } else {
83632cbb
                 qmat = td->custom_q;
6d702dc0
                 for (i = 0; i < 64; i++)
4db4b53d
                     qmat[i] = ctx->quant_mat[i] * q;
6d702dc0
             }
0b0953ba
             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
6d702dc0
                 bits += estimate_slice_plane(ctx, &error, i,
c8e186fa
                                              src, linesize[i],
6d702dc0
                                              mbs_per_slice,
                                              num_cblocks[i], plane_factor[i],
83632cbb
                                              qmat, td);
6d702dc0
             }
0b0953ba
             if (ctx->alpha_bits)
                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
                                              mbs_per_slice, q, td->blocks[3]);
6d702dc0
             if (bits <= ctx->bits_per_mb * mbs_per_slice)
                 break;
         }
 
         slice_bits[max_quant + 1]  = bits;
         slice_score[max_quant + 1] = error;
         overquant = q;
     }
83632cbb
     td->nodes[trellis_node + max_quant + 1].quant = overquant;
1a265f61
 
     bits_limit = mbs * ctx->bits_per_mb;
6d702dc0
     for (pq = min_quant; pq < max_quant + 2; pq++) {
1a265f61
         prev = trellis_node - TRELLIS_WIDTH + pq;
 
6d702dc0
         for (q = min_quant; q < max_quant + 2; q++) {
1a265f61
             cur = trellis_node + q;
 
83632cbb
             bits  = td->nodes[prev].bits + slice_bits[q];
1a265f61
             error = slice_score[q];
             if (bits > bits_limit)
                 error = SCORE_LIMIT;
 
83632cbb
             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
                 new_score = td->nodes[prev].score + error;
1a265f61
             else
                 new_score = SCORE_LIMIT;
83632cbb
             if (td->nodes[cur].prev_node == -1 ||
                 td->nodes[cur].score >= new_score) {
1a265f61
 
83632cbb
                 td->nodes[cur].bits      = bits;
                 td->nodes[cur].score     = new_score;
                 td->nodes[cur].prev_node = prev;
1a265f61
             }
         }
     }
 
83632cbb
     error = td->nodes[trellis_node + min_quant].score;
1a265f61
     pq    = trellis_node + min_quant;
6d702dc0
     for (q = min_quant + 1; q < max_quant + 2; q++) {
83632cbb
         if (td->nodes[trellis_node + q].score <= error) {
             error = td->nodes[trellis_node + q].score;
1a265f61
             pq    = trellis_node + q;
         }
     }
 
     return pq;
 }
 
83632cbb
 static int find_quant_thread(AVCodecContext *avctx, void *arg,
                              int jobnr, int threadnr)
 {
     ProresContext *ctx = avctx->priv_data;
     ProresThreadData *td = ctx->tdata + threadnr;
     int mbs_per_slice = ctx->mbs_per_slice;
     int x, y = jobnr, mb, q = 0;
 
     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
         while (ctx->mb_width - x < mbs_per_slice)
             mbs_per_slice >>= 1;
05fb4c9a
         q = find_slice_quant(avctx,
83632cbb
                              (mb + 1) * TRELLIS_WIDTH, x, y,
                              mbs_per_slice, td);
     }
 
     for (x = ctx->slices_width - 1; x >= 0; x--) {
         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
         q = td->nodes[q].prev_node;
     }
 
     return 0;
 }
 
1a265f61
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pic, int *got_packet)
 {
     ProresContext *ctx = avctx->priv_data;
     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
     uint8_t *picture_size_pos;
     PutBitContext pb;
     int x, y, i, mb, q = 0;
     int sizes[4] = { 0 };
     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
     int frame_size, picture_size, slice_size;
76a8cb9d
     int pkt_size, ret;
f0e51be8
     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
c8e186fa
     uint8_t frame_flags;
1a265f61
 
05fb4c9a
     ctx->pic = pic;
52b81ff4
     pkt_size = ctx->frame_size_upper_bound;
1a265f61
 
29d147c9
     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1a265f61
         return ret;
 
     orig_buf = pkt->data;
 
     // frame atom
     orig_buf += 4;                              // frame size
     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
     buf = orig_buf;
 
     // frame header
     tmp = buf;
     buf += 2;                                   // frame header size will be stored here
     bytestream_put_be16  (&buf, 0);             // version 1
4db4b53d
     bytestream_put_buffer(&buf, ctx->vendor, 4);
1a265f61
     bytestream_put_be16  (&buf, avctx->width);
     bytestream_put_be16  (&buf, avctx->height);
c8e186fa
 
     frame_flags = ctx->chroma_factor << 6;
7c6eb0a1
     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
c8e186fa
         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
     bytestream_put_byte  (&buf, frame_flags);
 
1a265f61
     bytestream_put_byte  (&buf, 0);             // reserved
12b812d2
     bytestream_put_byte  (&buf, avctx->color_primaries);
     bytestream_put_byte  (&buf, avctx->color_trc);
     bytestream_put_byte  (&buf, avctx->colorspace);
0b0953ba
     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1a265f61
     bytestream_put_byte  (&buf, 0);             // reserved
4db4b53d
     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
         // luma quantisation matrix
         for (i = 0; i < 64; i++)
             bytestream_put_byte(&buf, ctx->quant_mat[i]);
         // chroma quantisation matrix
         for (i = 0; i < 64; i++)
             bytestream_put_byte(&buf, ctx->quant_mat[i]);
     } else {
         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
     }
1a265f61
     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
 
c0f4cf77
     for (ctx->cur_picture_idx = 0;
          ctx->cur_picture_idx < ctx->pictures_per_frame;
          ctx->cur_picture_idx++) {
c8e186fa
         // picture header
         picture_size_pos = buf + 1;
         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
         buf += 4;                                   // picture data size will be stored here
         bytestream_put_be16  (&buf, ctx->slices_per_picture);
         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
 
         // seek table - will be filled during slice encoding
         slice_sizes = buf;
         buf += ctx->slices_per_picture * 2;
 
         // slices
         if (!ctx->force_quant) {
da2a49ac
             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
c8e186fa
                                   ctx->mb_height);
             if (ret)
                 return ret;
         }
1a265f61
 
c8e186fa
         for (y = 0; y < ctx->mb_height; y++) {
             int mbs_per_slice = ctx->mbs_per_slice;
             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
                 q = ctx->force_quant ? ctx->force_quant
                                      : ctx->slice_q[mb + y * ctx->slices_width];
 
                 while (ctx->mb_width - x < mbs_per_slice)
                     mbs_per_slice >>= 1;
 
                 bytestream_put_byte(&buf, slice_hdr_size << 3);
                 slice_hdr = buf;
                 buf += slice_hdr_size - 1;
45ce880a
                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
                     uint8_t *start = pkt->data;
                     // Recompute new size according to max_slice_size
                     // and deduce delta
f0e51be8
                     int delta = 200 + (ctx->pictures_per_frame *
                                 ctx->slices_per_picture + 1) *
                                 max_slice_size - pkt_size;
45ce880a
 
                     delta = FFMAX(delta, 2 * max_slice_size);
                     ctx->frame_size_upper_bound += delta;
 
                     if (!ctx->warn) {
                         avpriv_request_sample(avctx,
                                               "Packet too small: is %i,"
                                               " needs %i (slice: %i). "
                                               "Correct allocation",
                                               pkt_size, delta, max_slice_size);
                         ctx->warn = 1;
                     }
 
                     ret = av_grow_packet(pkt, delta);
                     if (ret < 0)
                         return ret;
 
                     pkt_size += delta;
                     // restore pointers
                     orig_buf         = pkt->data + (orig_buf         - start);
                     buf              = pkt->data + (buf              - start);
                     picture_size_pos = pkt->data + (picture_size_pos - start);
                     slice_sizes      = pkt->data + (slice_sizes      - start);
                     slice_hdr        = pkt->data + (slice_hdr        - start);
                     tmp              = pkt->data + (tmp              - start);
                 }
50833c9f
                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
58b68e4f
                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
                                    mbs_per_slice);
52b81ff4
                 if (ret < 0)
                     return ret;
c8e186fa
 
                 bytestream_put_byte(&slice_hdr, q);
                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
                 for (i = 0; i < ctx->num_planes - 1; i++) {
                     bytestream_put_be16(&slice_hdr, sizes[i]);
                     slice_size += sizes[i];
                 }
                 bytestream_put_be16(&slice_sizes, slice_size);
                 buf += slice_size - slice_hdr_size;
45ce880a
                 if (max_slice_size < slice_size)
                     max_slice_size = slice_size;
1a265f61
             }
         }
c8e186fa
 
         picture_size = buf - (picture_size_pos - 1);
         bytestream_put_be32(&picture_size_pos, picture_size);
1a265f61
     }
 
     orig_buf -= 8;
     frame_size = buf - orig_buf;
     bytestream_put_be32(&orig_buf, frame_size);
c8e186fa
 
1a265f61
     pkt->size   = frame_size;
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
     return 0;
 }
 
 static av_cold int encode_close(AVCodecContext *avctx)
 {
     ProresContext *ctx = avctx->priv_data;
83632cbb
     int i;
1a265f61
 
83632cbb
     if (ctx->tdata) {
         for (i = 0; i < avctx->thread_count; i++)
1c5647f4
             av_freep(&ctx->tdata[i].nodes);
83632cbb
     }
     av_freep(&ctx->tdata);
1a265f61
     av_freep(&ctx->slice_q);
 
     return 0;
 }
 
a9aee08d
 static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
3fd22538
                         ptrdiff_t linesize, int16_t *block)
92e598a5
 {
     int x, y;
     const uint16_t *tsrc = src;
 
     for (y = 0; y < 8; y++) {
         for (x = 0; x < 8; x++)
             block[y * 8 + x] = tsrc[x];
         tsrc += linesize >> 1;
     }
a9aee08d
     fdsp->fdct(block);
92e598a5
 }
 
1a265f61
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     ProresContext *ctx = avctx->priv_data;
     int mps;
     int i, j;
     int min_quant, max_quant;
7c6eb0a1
     int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1a265f61
 
     avctx->bits_per_raw_sample = 10;
495eee01
 #if FF_API_CODED_FRAME
 FF_DISABLE_DEPRECATION_WARNINGS
da2a49ac
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
495eee01
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
1a265f61
 
92e598a5
     ctx->fdct      = prores_fdct;
d6acefe0
     ctx->scantable = interlaced ? ff_prores_interlaced_scan
                                 : ff_prores_progressive_scan;
a9aee08d
     ff_fdctdsp_init(&ctx->fdsp, avctx);
1a265f61
 
     mps = ctx->mbs_per_slice;
     if (mps & (mps - 1)) {
         av_log(avctx, AV_LOG_ERROR,
                "there should be an integer power of two MBs per slice\n");
         return AVERROR(EINVAL);
     }
a8087336
     if (ctx->profile == PRORES_PROFILE_AUTO) {
140f5355
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
                         !(desc->log2_chroma_w + desc->log2_chroma_h))
a8087336
                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
140f5355
                ? "4:4:4:4 profile because of the used input colorspace"
a8087336
                : "HQ profile to keep best quality");
     }
0b0953ba
     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
a8087336
         if (ctx->profile != PRORES_PROFILE_4444) {
             // force alpha and warn
             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
                    "encode alpha. Override with -profile if needed.\n");
             ctx->alpha_bits = 0;
         }
0b0953ba
         if (ctx->alpha_bits & 7) {
             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
             return AVERROR(EINVAL);
         }
0a51c7d4
         avctx->bits_per_coded_sample = 32;
0b0953ba
     } else {
         ctx->alpha_bits = 0;
     }
1a265f61
 
716d413c
     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1a265f61
                          ? CFACTOR_Y422
                          : CFACTOR_Y444;
     ctx->profile_info  = prores_profile_info + ctx->profile;
0b0953ba
     ctx->num_planes    = 3 + !!ctx->alpha_bits;
1a265f61
 
     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
c8e186fa
 
     if (interlaced)
         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
     else
         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
 
1a265f61
     ctx->slices_width  = ctx->mb_width / mps;
     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
c8e186fa
     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
     ctx->pictures_per_frame = 1 + interlaced;
1a265f61
 
4db4b53d
     if (ctx->quant_sel == -1)
         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
     else
         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
 
     if (strlen(ctx->vendor) != 4) {
         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
         return AVERROR_INVALIDDATA;
     }
 
6acac061
     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
     if (!ctx->force_quant) {
         if (!ctx->bits_per_mb) {
             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
c0f4cf77
                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
                                            ctx->pictures_per_frame)
6acac061
                     break;
             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
         } else if (ctx->bits_per_mb < 128) {
             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
             return AVERROR_INVALIDDATA;
         }
 
         min_quant = ctx->profile_info->min_quant;
         max_quant = ctx->profile_info->max_quant;
         for (i = min_quant; i < MAX_STORED_Q; i++) {
             for (j = 0; j < 64; j++)
                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
         }
 
c8e186fa
         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
83632cbb
         if (!ctx->slice_q) {
6acac061
             encode_close(avctx);
             return AVERROR(ENOMEM);
         }
 
83632cbb
         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
         if (!ctx->tdata) {
6acac061
             encode_close(avctx);
             return AVERROR(ENOMEM);
         }
83632cbb
 
         for (j = 0; j < avctx->thread_count; j++) {
             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
                                             * TRELLIS_WIDTH
                                             * sizeof(*ctx->tdata->nodes));
             if (!ctx->tdata[j].nodes) {
                 encode_close(avctx);
                 return AVERROR(ENOMEM);
             }
             for (i = min_quant; i < max_quant + 2; i++) {
                 ctx->tdata[j].nodes[i].prev_node = -1;
                 ctx->tdata[j].nodes[i].bits      = 0;
                 ctx->tdata[j].nodes[i].score     = 0;
             }
         }
6acac061
     } else {
         int ls = 0;
 
         if (ctx->force_quant > 64) {
             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
             return AVERROR_INVALIDDATA;
         }
 
         for (j = 0; j < 64; j++) {
             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
         }
 
         ctx->bits_per_mb = ls * 8;
         if (ctx->chroma_factor == CFACTOR_Y444)
             ctx->bits_per_mb += ls * 4;
4db4b53d
     }
1a265f61
 
f0e51be8
     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
                                    ctx->slices_per_picture + 1) *
c8e186fa
                                   (2 + 2 * ctx->num_planes +
c0f4cf77
                                    (mps * ctx->bits_per_mb) / 8)
                                   + 200;
6d702dc0
 
117bc8e6
     if (ctx->alpha_bits) {
41e1354c
          // The alpha plane is run-coded and might exceed the bit budget.
f0e51be8
          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
                                          ctx->slices_per_picture + 1) *
117bc8e6
          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
     }
 
1a265f61
     avctx->codec_tag   = ctx->profile_info->tag;
 
c0f4cf77
     av_log(avctx, AV_LOG_DEBUG,
            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
            interlaced ? "yes" : "no", ctx->bits_per_mb);
c8e186fa
     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
            ctx->frame_size_upper_bound);
1a265f61
 
     return 0;
 }
 
 #define OFFSET(x) offsetof(ProresContext, x)
 #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[] = {
     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
e6153f17
         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1a265f61
     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
a8087336
         { .i64 = PRORES_PROFILE_AUTO },
         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" },
     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
         0, 0, VE, "profile" },
124134e4
     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1a265f61
         0, 0, VE, "profile" },
124134e4
     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1a265f61
         0, 0, VE, "profile" },
124134e4
     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1a265f61
         0, 0, VE, "profile" },
124134e4
     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1a265f61
         0, 0, VE, "profile" },
0b0953ba
     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
         0, 0, VE, "profile" },
4db4b53d
     { "vendor", "vendor ID", OFFSET(vendor),
         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
e6153f17
         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
4db4b53d
     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
e6153f17
         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
124134e4
     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
4db4b53d
         0, 0, VE, "quant_mat" },
124134e4
     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
4db4b53d
         0, 0, VE, "quant_mat" },
124134e4
     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
4db4b53d
         0, 0, VE, "quant_mat" },
124134e4
     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
4db4b53d
         0, 0, VE, "quant_mat" },
124134e4
     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
4db4b53d
         0, 0, VE, "quant_mat" },
124134e4
     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
4db4b53d
         0, 0, VE, "quant_mat" },
0b0953ba
     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
         { .i64 = 16 }, 0, 16, VE },
1a265f61
     { NULL }
 };
 
 static const AVClass proresenc_class = {
     .class_name = "ProRes encoder",
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
b3c39962
 AVCodec ff_prores_ks_encoder = {
     .name           = "prores_ks",
b2bed932
     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1a265f61
     .type           = AVMEDIA_TYPE_VIDEO,
36ef5369
     .id             = AV_CODEC_ID_PRORES,
1a265f61
     .priv_data_size = sizeof(ProresContext),
     .init           = encode_init,
     .close          = encode_close,
     .encode2        = encode_frame,
def97856
     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
716d413c
     .pix_fmts       = (const enum AVPixelFormat[]) {
0b0953ba
                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1a265f61
                       },
     .priv_class     = &proresenc_class,
 };