GitList

Browse code

prores: extract idct into its own dspcontext and merge with put_pixels.

Ronald S. Bultje authored on 2011/10/07 00:03:38
Showing 8 changed files

libavcodec/Makefile index 3c4e2f8..b7b5124 100644
libavcodec/dsputil.c index 967406e..182063c 100644
libavcodec/dsputil.h index 73830f8..bef2cdd 100644
libavcodec/proresdec.c index c70d145..cbbd068 100644
libavcodec/proresdsp.c index 0000000..3038176
libavcodec/proresdsp.h index 0000000..18d6bf5
libavcodec/simple_idct.c index b62658b..c6cd908 100644
libavcodec/simple_idct.h index a33eb96..6e22158 100644

@@ -295,7 +295,7 @@ OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o
                      OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
                      OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
                      OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
                     -OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o
                     +OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdsp.o
                      OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
                      OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
                                                                celp_filters.o acelp_vectors.o \

libavcodec/dsputil.c

History View file @ 92fb52d

@@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
+                         }
+                     }
                     +void ff_init_scantable_permutation(uint8_t *idct_permutation,
                     +                                   int idct_permutation_type)
                     +{
                     +    int i;
+                    +
                     +    switch(idct_permutation_type){
                     +    case FF_NO_IDCT_PERM:
                     +        for(i=0; i<64; i++)
                     +            idct_permutation[i]= i;
                     +        break;
                     +    case FF_LIBMPEG2_IDCT_PERM:
                     +        for(i=0; i<64; i++)
                     +            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
                     +        break;
                     +    case FF_SIMPLE_IDCT_PERM:
                     +        for(i=0; i<64; i++)
                     +            idct_permutation[i]= simple_mmx_permutation[i];
                     +        break;
                     +    case FF_TRANSPOSE_IDCT_PERM:
                     +        for(i=0; i<64; i++)
                     +            idct_permutation[i]= ((i&7)<<3) | (i>>3);
                     +        break;
                     +    case FF_PARTTRANS_IDCT_PERM:
                     +        for(i=0; i<64; i++)
                     +            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
                     +        break;
                     +    case FF_SSE2_IDCT_PERM:
                     +        for(i=0; i<64; i++)
                     +            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
                     +        break;
                     +    default:
                     +        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
                     +    }
                     +}
+                    +
                      static int pix_sum_c(uint8_t * pix, int line_size)
+                     {
                          int s, i, j;
@@ -3123,32 +3158,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
                                  c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
+                         }
                     -    switch(c->idct_permutation_type){
                     -    case FF_NO_IDCT_PERM:
                     -        for(i=0; i<64; i++)
                     -            c->idct_permutation[i]= i;
                     -        break;
                     -    case FF_LIBMPEG2_IDCT_PERM:
                     -        for(i=0; i<64; i++)
                     -            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
                     -        break;
                     -    case FF_SIMPLE_IDCT_PERM:
                     -        for(i=0; i<64; i++)
                     -            c->idct_permutation[i]= simple_mmx_permutation[i];
                     -        break;
                     -    case FF_TRANSPOSE_IDCT_PERM:
                     -        for(i=0; i<64; i++)
                     -            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
                     -        break;
                     -    case FF_PARTTRANS_IDCT_PERM:
                     -        for(i=0; i<64; i++)
                     -            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
                     -        break;
                     -    case FF_SSE2_IDCT_PERM:
                     -        for(i=0; i<64; i++)
                     -            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
                     -        break;
                     -    default:
                     -        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
                     -    }
                     +    ff_init_scantable_permutation(c->idct_permutation,
                     +                                  c->idct_permutation_type);
+                     }

libavcodec/dsputil.h

History View file @ 92fb52d

@@ -202,6 +202,8 @@ typedef struct ScanTable{
                      } ScanTable;
                      void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
                     +void ff_init_scantable_permutation(uint8_t *idct_permutation,
                     +                                   int idct_permutation_type);
                      #define EMULATED_EDGE(depth) \
                      void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\

libavcodec/proresdec.c

History View file @ 92fb52d

@@ -34,17 +34,11 @@
                      #include "libavutil/intmath.h"
                      #include "avcodec.h"
                     -#include "dsputil.h"
                     +#include "proresdsp.h"
                      #include "get_bits.h"
                     -#define BITS_PER_SAMPLE 10                              ///< output precision of that decoder
                     -#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
                     -#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
                     -#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
+                    -
+                    -
                      typedef struct {
                     -    DSPContext dsp;
                     +    ProresDSPContext dsp;
                          AVFrame    picture;
                          ScanTable  scantable;
                          int        scantable_type;           ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
@@ -104,8 +98,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
                          avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
                     -    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
                     -    dsputil_init(&ctx->dsp, avctx);
                     +    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
                     +    ff_proresdsp_init(&ctx->dsp);
                          avctx->coded_frame = &ctx->picture;
                          avcodec_get_frame_defaults(&ctx->picture);
@@ -449,48 +443,6 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
+                     }
                     -#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
+                    -
                     -/**
                     - * Add bias value, clamp and output pixels of a slice
                     - */
                     -static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
                     -                       int mbs_per_slice, int blocks_per_mb)
                     -{
                     -    int mb, x, y, src_offset, dst_offset;
                     -    const DCTELEM *src1, *src2;
                     -    uint16_t *dst1, *dst2;
+                    -
                     -    src1 = in;
                     -    src2 = in + (blocks_per_mb << 5);
                     -    dst1 = out;
                     -    dst2 = out + (stride << 3);
+                    -
                     -    for (mb = 0; mb < mbs_per_slice; mb++) {
                     -        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
                     -            for (x = 0; x < 8; x++) {
                     -                src_offset = (y << 3) + x;
+                    -
                     -                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
                     -                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
+                    -
                     -                if (blocks_per_mb > 2) {
                     -                    dst1[dst_offset + x + 8] =
                     -                        CLIP_AND_BIAS(src1[src_offset + 64]);
                     -                    dst2[dst_offset + x + 8] =
                     -                        CLIP_AND_BIAS(src2[src_offset + 64]);
                     -                }
                     -            }
                     -        }
+                    -
                     -        src1 += blocks_per_mb << 6;
                     -        src2 += blocks_per_mb << 6;
                     -        dst1 += blocks_per_mb << 2;
                     -        dst2 += blocks_per_mb << 2;
                     -    }
                     -}
+                    -
+                    -
                      /**
                       * Decode a slice plane (luma or chroma).
                       */
@@ -502,7 +454,7 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
+                     {
                          GetBitContext gb;
                          DCTELEM *block_ptr;
                     -    int i, blk_num, blocks_per_slice;
                     +    int mb_num, blocks_per_slice;
                          blocks_per_slice = mbs_per_slice * blocks_per_mb;
@@ -518,20 +470,20 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
                          /* inverse quantization, inverse transform and output */
                          block_ptr = ctx->blocks;
                     -    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
                     -        /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
                     -         * and the input of the inverse transform should be scaled by 2
                     -         * in order to avoid rounding errors.
                     -         * Due to the fact the existing Libav transforms are incompatible with
                     -         * that input I temporally introduced the coarse solution below... */
                     -        for (i = 0; i < 64; i++)
                     -            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
+                    -
                     -        ctx->dsp.idct(block_ptr);
                     +    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
                     +        ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
                     +        block_ptr += 64;
                     +        if (blocks_per_mb > 2) {
                     +            ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
                     +            block_ptr += 64;
                     +        }
                     +        ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
                     +        block_ptr += 64;
                     +        if (blocks_per_mb > 2) {
                     +            ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
                     +            block_ptr += 64;
                     +        }
+                         }
+                    -
                     -    put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
                     -               blocks_per_mb);
+                     }

libavcodec/proresdsp.c

History View file @ 92fb52d

                     new file mode 100644
@@ -0,0 +1,61 @@
                     +/*
                     + * Apple ProRes compatible decoder
                     + *
                     + * Copyright (c) 2010-2011 Maxim Poliakovski
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "proresdsp.h"
                     +#include "simple_idct.h"
+                    +
                     +#define BIAS     (1 << (PRORES_BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
                     +#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
                     +#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
+                    +
                     +#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
+                    +
                     +/**
                     + * Add bias value, clamp and output pixels of a slice
                     + */
                     +static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
                     +{
                     +    int x, y, src_offset, dst_offset;
+                    +
                     +    for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
                     +        for (x = 0; x < 8; x++) {
                     +            src_offset = (y << 3) + x;
+                    +
                     +            dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
                     +        }
                     +    }
                     +}
+                    +
                     +static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat)
                     +{
                     +    ff_prores_idct(block, qmat);
                     +    put_pixels(out, linesize >> 1, block);
                     +}
+                    +
                     +void ff_proresdsp_init(ProresDSPContext *dsp)
                     +{
                     +    dsp->idct_put = prores_idct_put_c;
                     +    dsp->idct_permutation_type = FF_NO_IDCT_PERM;
+                    +
                     +    ff_init_scantable_permutation(dsp->idct_permutation,
                     +                                  dsp->idct_permutation_type);
                     +}

libavcodec/proresdsp.h

History View file @ 92fb52d

                     new file mode 100644
@@ -0,0 +1,38 @@
                     +/*
                     + * Apple ProRes compatible decoder
                     + *
                     + * Copyright (c) 2010-2011 Maxim Poliakovski
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#ifndef AVCODEC_PRORESDSP_H
                     +#define AVCODEC_PRORESDSP_H
+                    +
                     +#include "dsputil.h"
+                    +
                     +#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
+                    +
                     +typedef struct {
                     +    int idct_permutation_type;
                     +    uint8_t idct_permutation[64];
                     +    void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
                     +} ProresDSPContext;
+                    +
                     +void ff_proresdsp_init(ProresDSPContext *dsp);
+                    +
                     +#endif /* AVCODEC_PRORESDSP_H */

libavcodec/simple_idct.c

History View file @ 92fb52d

@@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
                              idct4col_add(dest + i, line_size, block + i);
+                         }
+                     }
+                    +
                     +void ff_prores_idct(DCTELEM *block, const int16_t *qmat)
                     +{
                     +    int i;
+                    +
                     +    for (i = 0; i < 64; i++)
                     +        block[i] *= qmat[i];
+                    +
                     +    for (i = 0; i < 8; i++)
                     +        idctRowCondDC_10(block + i*8);
+                    +
                     +    for (i = 0; i < 64; i++)
                     +        block[i] >>= 2;
+                    +
                     +    for (i = 0; i < 8; i++)
                     +        idctSparseCol_10(block + i);
                     +}

libavcodec/simple_idct.h

History View file @ 92fb52d

@@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block);
                      void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
                      void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
                      void ff_simple_idct_10(DCTELEM *block);
                     +/**
                     + * Special version of ff_simple_idct_10() which does dequantization
                     + * and scales by a factor of 2 more between the two IDCTs to account
                     + * for larger scale of input coefficients.
                     + */
                     +void ff_prores_idct(DCTELEM *block, const int16_t *qmat);
                      void ff_simple_idct_mmx(int16_t *block);
                      void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);