GitList

Browse code

Merge commit '57f09608e1600d1cf1679885a46f5004d522d68f'

* commit '57f09608e1600d1cf1679885a46f5004d522d68f':
dsputil: Move thirdpel-related bits into their own context

Conflicts:
libavcodec/svq3.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2014/03/23 07:26:23
Showing 12 changed files

configure index 5ce2961..778a2d8 100755
doc/optimization.txt index 9357545..cce472f 100644
libavcodec/Makefile index e243420..8c4d292 100644
libavcodec/dsputil.c index eeeda7c..861aa4c 100644
libavcodec/dsputil.h index 5255625..0730a78 100644
libavcodec/h264qpel_template.c index 37c48a9..cb421b5 100644
libavcodec/hpel_template.c index 65bbd9b..6695a65 100644
libavcodec/hpeldsp_template.c index 2dc5ddc..983ff0e 100644
libavcodec/svq3.c index 01a2e6a..bf8c688 100644
libavcodec/tpel_template.c index 0000000..6e25025
libavcodec/tpeldsp.c index 0000000..b5af72c
libavcodec/tpeldsp.h index 0000000..3732f17

configure

History View file @ 9d6a27d

@@ -1780,6 +1780,7 @@ CONFIG_EXTRA="
                          rtpdec
                          rtpenc_chain
                          sinewin
                     +    tpeldsp
                          videodsp
                          vp3dsp
+                     "
@@ -2090,7 +2091,7 @@ sonic_ls_encoder_select="golomb"
                      sp5x_decoder_select="mjpeg_decoder"
                      svq1_decoder_select="hpeldsp"
                      svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
                     -svq3_decoder_select="h264_decoder hpeldsp"
                     +svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
                      svq3_decoder_suggest="zlib"
                      tak_decoder_select="dsputil"
                      theora_decoder_select="vp3_decoder"

doc/optimization.txt

History View file @ 9d6a27d

@@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4
                          Just used to work around a bug in an old libavcodec encoder version.
                          Don't optimize them.
                     -tpel_mc_func {put,avg}_tpel_pixels_tab
                     -    Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding.
+                    -
                      add_bytes/diff_bytes
                          For huffyuv only, optimize if you want a faster ffhuffyuv codec.

libavcodec/Makefile

History View file @ 9d6a27d

@@ -76,6 +76,7 @@ RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
                      OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
                      OBJS-$(CONFIG_SHARED)                  += log2_tab.o
                      OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
                     +OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
                      OBJS-$(CONFIG_VAAPI)                   += vaapi.o
                      OBJS-$(CONFIG_VDPAU)                   += vdpau.o
                      OBJS-$(CONFIG_VIDEODSP)                += videodsp.o

libavcodec/dsputil.c

History View file @ 9d6a27d

@@ -50,6 +50,7 @@ uint32_t ff_square_tab[512] = { 0, };
                      #undef BIT_DEPTH
                      #define BIT_DEPTH 8
                     +#include "tpel_template.c"
                      #include "dsputil_template.c"
                      // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
@@ -604,284 +605,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                         }
+                     }
                     -static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    switch (width) {
                     -    case 2:
                     -        put_pixels2_8_c(dst, src, stride, height);
                     -        break;
                     -    case 4:
                     -        put_pixels4_8_c(dst, src, stride, height);
                     -        break;
                     -    case 8:
                     -        put_pixels8_8_c(dst, src, stride, height);
                     -        break;
                     -    case 16:
                     -        put_pixels16_8_c(dst, src, stride, height);
                     -        break;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((2 * src[j] + src[j + 1] + 1) *
                     -                      683) >> 11;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
                     -                      683) >> 11;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((2 * src[j] + src[j + stride] + 1) *
                     -                      683) >> 11;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((4 * src[j]          + 3 * src[j + 1] +
                     -                       3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                     -                      2731) >> 15;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((3 * src[j]          + 2 * src[j + 1] +
                     -                       4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     -                      2731) >> 15;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
                     -                      683) >> 11;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((3 * src[j]          + 4 * src[j + 1] +
                     -                       2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     -                      2731) >> 15;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = ((2 * src[j]          + 3 * src[j + 1] +
                     -                       3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                     -                      2731) >> 15;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    switch (width) {
                     -    case 2:
                     -        avg_pixels2_8_c(dst, src, stride, height);
                     -        break;
                     -    case 4:
                     -        avg_pixels4_8_c(dst, src, stride, height);
                     -        break;
                     -    case 8:
                     -        avg_pixels8_8_c(dst, src, stride, height);
                     -        break;
                     -    case 16:
                     -        avg_pixels16_8_c(dst, src, stride, height);
                     -        break;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((2 * src[j] + src[j + 1] + 1) *
                     -                        683) >> 11) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((src[j] + 2 * src[j + 1] + 1) *
                     -                        683) >> 11) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((2 * src[j] + src[j + stride] + 1) *
                     -                        683) >> 11) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((4 * src[j]          + 3 * src[j + 1] +
                     -                         3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                     -                        2731) >> 15) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((3 * src[j]          + 2 * src[j + 1] +
                     -                         4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     -                        2731) >> 15) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((src[j] + 2 * src[j + stride] + 1) *
                     -                        683) >> 11) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((3 * src[j]          + 4 * src[j + 1] +
                     -                         2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     -                        2731) >> 15) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                     -static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                     -                                          int stride, int width, int height)
                     -{
                     -    int i, j;
+                    -
                     -    for (i = 0; i < height; i++) {
                     -        for (j = 0; j < width; j++)
                     -            dst[j] = (dst[j] +
                     -                      (((2 * src[j]          + 3 * src[j + 1] +
                     -                         3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                     -                        2731) >> 15) + 1) >> 1;
                     -        src += stride;
                     -        dst += stride;
                     -    }
                     -}
+                    -
                      #define QPEL_MC(r, OPNAME, RND, OP)                                           \
                      static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src,       \
                                                                  int dstStride, int srcStride,     \
@@ -2974,26 +2697,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
                          c->pix_abs[1][2] = pix_abs8_y2_c;
                          c->pix_abs[1][3] = pix_abs8_xy2_c;
                     -    c->put_tpel_pixels_tab[0]  = put_tpel_pixels_mc00_c;
                     -    c->put_tpel_pixels_tab[1]  = put_tpel_pixels_mc10_c;
                     -    c->put_tpel_pixels_tab[2]  = put_tpel_pixels_mc20_c;
                     -    c->put_tpel_pixels_tab[4]  = put_tpel_pixels_mc01_c;
                     -    c->put_tpel_pixels_tab[5]  = put_tpel_pixels_mc11_c;
                     -    c->put_tpel_pixels_tab[6]  = put_tpel_pixels_mc21_c;
                     -    c->put_tpel_pixels_tab[8]  = put_tpel_pixels_mc02_c;
                     -    c->put_tpel_pixels_tab[9]  = put_tpel_pixels_mc12_c;
                     -    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
+                    -
                     -    c->avg_tpel_pixels_tab[0]  = avg_tpel_pixels_mc00_c;
                     -    c->avg_tpel_pixels_tab[1]  = avg_tpel_pixels_mc10_c;
                     -    c->avg_tpel_pixels_tab[2]  = avg_tpel_pixels_mc20_c;
                     -    c->avg_tpel_pixels_tab[4]  = avg_tpel_pixels_mc01_c;
                     -    c->avg_tpel_pixels_tab[5]  = avg_tpel_pixels_mc11_c;
                     -    c->avg_tpel_pixels_tab[6]  = avg_tpel_pixels_mc21_c;
                     -    c->avg_tpel_pixels_tab[8]  = avg_tpel_pixels_mc02_c;
                     -    c->avg_tpel_pixels_tab[9]  = avg_tpel_pixels_mc12_c;
                     -    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
+                    -
                      #define dspfunc(PFX, IDX, NUM)                              \
                          c->PFX ## _pixels_tab[IDX][0]  = PFX ## NUM ## _mc00_c; \
                          c->PFX ## _pixels_tab[IDX][1]  = PFX ## NUM ## _mc10_c; \

libavcodec/dsputil.h

History View file @ 9d6a27d

@@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
                       * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
                       * h for op_pixels_func is limited to { width / 2, width },
                       * but never larger than 16 and never smaller than 4. */
                     -typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
                     -                             const uint8_t *pixels /* align 1 */,
                     -                             int line_size, int w, int h);
                      typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
                                                   uint8_t *src /* align 1 */, ptrdiff_t stride);
@@ -190,19 +187,6 @@ typedef struct DSPContext {
                          int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
                                                   int size);
                     -    /**
                     -     * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
                     -     * this is an array[12] of motion compensation functions for the
                     -     * 9 thirdpel positions<br>
                     -     * *pixels_tab[xthirdpel + 4 * ythirdpel]
                     -     * @param block destination where the result is stored
                     -     * @param pixels source
                     -     * @param line_size number of bytes in a horizontal line of block
                     -     * @param h height
                     -     */
                     -    tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
                     -    tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
+                    -
                          qpel_mc_func put_qpel_pixels_tab[2][16];
                          qpel_mc_func avg_qpel_pixels_tab[2][16];
                          qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];

libavcodec/h264qpel_template.c

History View file @ 9d6a27d

@@ -24,6 +24,7 @@
                      #include "bit_depth_template.c"
                      #include "hpel_template.c"
                     +#include "tpel_template.c"
                      static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+                     {

libavcodec/hpel_template.c

History View file @ 9d6a27d

@@ -22,47 +22,6 @@
                      #include "pixels.h"
                      #define DEF_HPEL(OPNAME, OP)                                            \
                     -static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block,            \
                     -                                             const uint8_t *pixels,     \
                     -                                             ptrdiff_t line_size,       \
                     -                                             int h)                     \
                     -{                                                                       \
                     -    int i;                                                              \
                     -    for (i = 0; i < h; i++) {                                           \
                     -        OP(*((pixel2 *) block), AV_RN2P(pixels));                       \
                     -        pixels += line_size;                                            \
                     -        block  += line_size;                                            \
                     -    }                                                                   \
                     -}                                                                       \
                     -                                                                        \
                     -static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block,            \
                     -                                             const uint8_t *pixels,     \
                     -                                             ptrdiff_t line_size,       \
                     -                                             int h)                     \
                     -{                                                                       \
                     -    int i;                                                              \
                     -    for (i = 0; i < h; i++) {                                           \
                     -        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
                     -        pixels += line_size;                                            \
                     -        block  += line_size;                                            \
                     -    }                                                                   \
                     -}                                                                       \
                     -                                                                        \
                     -static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block,            \
                     -                                             const uint8_t *pixels,     \
                     -                                             ptrdiff_t line_size,       \
                     -                                             int h)                     \
                     -{                                                                       \
                     -    int i;                                                              \
                     -    for (i = 0; i < h; i++) {                                           \
                     -        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
                     -        OP(*((pixel4 *) (block + 4 * sizeof(pixel))),                   \
                     -           AV_RN4P(pixels + 4 * sizeof(pixel)));                        \
                     -        pixels += line_size;                                            \
                     -        block  += line_size;                                            \
                     -    }                                                                   \
                     -}                                                                       \
                     -                                                                        \
                      static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst,            \
                                                                     const uint8_t *src1,     \
                                                                     const uint8_t *src2,     \
@@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst,           \
                                                      dst_stride, src_stride1,                \
                                                      src_stride2, h);                        \
                      }                                                                       \
                     -                                                                        \
                     -CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16),                              \
                     -               FUNCC(OPNAME ## _pixels8),                               \
                     -               8 * sizeof(pixel))
                      #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
                      #define op_put(a, b) a = b

libavcodec/hpeldsp_template.c

History View file @ 9d6a27d

@@ -33,6 +33,7 @@
                      #include "bit_depth_template.c"
                      #include "hpel_template.c"
                     +#include "tpel_template.c"
                      #define PIXOP2(OPNAME, OP)                                              \
                      static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \

libavcodec/svq3.c

History View file @ 9d6a27d

@@ -54,6 +54,7 @@
                      #include "golomb.h"
                      #include "hpeldsp.h"
                      #include "rectangle.h"
                     +#include "tpeldsp.h"
                      #include "vdpau_internal.h"
                      #if CONFIG_ZLIB
@@ -71,6 +72,7 @@
                      typedef struct {
                          H264Context h;
                          HpelDSPContext hdsp;
                     +    TpelDSPContext tdsp;
                          H264Picture *cur_pic;
                          H264Picture *next_pic;
                          H264Picture *last_pic;
@@ -328,9 +330,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
                              src = h->edge_emu_buffer;
+                         }
                          if (thirdpel)
                     -        (avg ? h->dsp.avg_tpel_pixels_tab
                     -             : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
                     -                                                width, height);
                     +        (avg ? s->tdsp.avg_tpel_pixels_tab
                     +             : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
                     +                                                 width, height);
                          else
                              (avg ? s->hdsp.avg_pixels_tab
                                   : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize,
@@ -356,10 +358,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
                                      src = h->edge_emu_buffer;
+                                 }
                                  if (thirdpel)
                     -                (avg ? h->dsp.avg_tpel_pixels_tab
                     -                     : h->dsp.put_tpel_pixels_tab)[dxy](dest, src,
                     -                                                        h->uvlinesize,
                     -                                                        width, height);
                     +                (avg ? s->tdsp.avg_tpel_pixels_tab
                     +                     : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
                     +                                                         h->uvlinesize,
                     +                                                         width, height);
                                  else
                                      (avg ? s->hdsp.avg_pixels_tab
                                           : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
@@ -887,6 +889,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
                              goto fail;
                          ff_hpeldsp_init(&s->hdsp, avctx->flags);
                     +    ff_tpeldsp_init(&s->tdsp);
+                    +
                          h->flags           = avctx->flags;
                          h->is_complex      = 1;
                          h->sps.chroma_format_idc = 1;

libavcodec/tpel_template.c

History View file @ 9d6a27d

                     new file mode 100644
@@ -0,0 +1,80 @@
                     +/*
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include <stddef.h>
                     +#include <stdint.h>
+                    +
                     +#include "libavutil/intreadwrite.h"
                     +#include "pixels.h"
                     +#include "rnd_avg.h"
+                    +
                     +#include "bit_depth_template.c"
+                    +
                     +#define DEF_TPEL(OPNAME, OP)                                            \
                     +static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block,            \
                     +                                             const uint8_t *pixels,     \
                     +                                             ptrdiff_t line_size,       \
                     +                                             int h)                     \
                     +{                                                                       \
                     +    int i;                                                              \
                     +    for (i = 0; i < h; i++) {                                           \
                     +        OP(*((pixel2 *) block), AV_RN2P(pixels));                       \
                     +        pixels += line_size;                                            \
                     +        block  += line_size;                                            \
                     +    }                                                                   \
                     +}                                                                       \
                     +                                                                        \
                     +static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block,            \
                     +                                             const uint8_t *pixels,     \
                     +                                             ptrdiff_t line_size,       \
                     +                                             int h)                     \
                     +{                                                                       \
                     +    int i;                                                              \
                     +    for (i = 0; i < h; i++) {                                           \
                     +        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
                     +        pixels += line_size;                                            \
                     +        block  += line_size;                                            \
                     +    }                                                                   \
                     +}                                                                       \
                     +                                                                        \
                     +static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block,            \
                     +                                             const uint8_t *pixels,     \
                     +                                             ptrdiff_t line_size,       \
                     +                                             int h)                     \
                     +{                                                                       \
                     +    int i;                                                              \
                     +    for (i = 0; i < h; i++) {                                           \
                     +        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
                     +        OP(*((pixel4 *) (block + 4 * sizeof(pixel))),                   \
                     +           AV_RN4P(pixels + 4 * sizeof(pixel)));                        \
                     +        pixels += line_size;                                            \
                     +        block  += line_size;                                            \
                     +    }                                                                   \
                     +}                                                                       \
                     +                                                                        \
                     +CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16),                              \
                     +               FUNCC(OPNAME ## _pixels8),                               \
                     +               8 * sizeof(pixel))
+                    +
                     +#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
                     +#define op_put(a, b) a = b
+                    +
                     +DEF_TPEL(avg, op_avg)
                     +DEF_TPEL(put, op_put)
                     +#undef op_avg
                     +#undef op_put

libavcodec/tpeldsp.c

History View file @ 9d6a27d

                     new file mode 100644
@@ -0,0 +1,333 @@
                     +/*
                     + * thirdpel DSP functions
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +/**
                     + * @file
                     + * thirdpel DSP functions
                     + */
+                    +
                     +#include <stdint.h>
+                    +
                     +#include "libavutil/attributes.h"
                     +#include "tpeldsp.h"
+                    +
                     +#define BIT_DEPTH 8
                     +#include "tpel_template.c"
+                    +
                     +static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    switch (width) {
                     +    case 2:
                     +        put_pixels2_8_c(dst, src, stride, height);
                     +        break;
                     +    case 4:
                     +        put_pixels4_8_c(dst, src, stride, height);
                     +        break;
                     +    case 8:
                     +        put_pixels8_8_c(dst, src, stride, height);
                     +        break;
                     +    case 16:
                     +        put_pixels16_8_c(dst, src, stride, height);
                     +        break;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((2 * src[j] + src[j + 1] + 1) *
                     +                      683) >> 11;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
                     +                      683) >> 11;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((2 * src[j] + src[j + stride] + 1) *
                     +                      683) >> 11;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((4 * src[j]          + 3 * src[j + 1] +
                     +                       3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                     +                      2731) >> 15;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((3 * src[j]          + 2 * src[j + 1] +
                     +                       4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     +                      2731) >> 15;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
                     +                      683) >> 11;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((3 * src[j]          + 4 * src[j + 1] +
                     +                       2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     +                      2731) >> 15;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = ((2 * src[j]          + 3 * src[j + 1] +
                     +                       3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                     +                      2731) >> 15;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    switch (width) {
                     +    case 2:
                     +        avg_pixels2_8_c(dst, src, stride, height);
                     +        break;
                     +    case 4:
                     +        avg_pixels4_8_c(dst, src, stride, height);
                     +        break;
                     +    case 8:
                     +        avg_pixels8_8_c(dst, src, stride, height);
                     +        break;
                     +    case 16:
                     +        avg_pixels16_8_c(dst, src, stride, height);
                     +        break;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((2 * src[j] + src[j + 1] + 1) *
                     +                        683) >> 11) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((src[j] + 2 * src[j + 1] + 1) *
                     +                        683) >> 11) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((2 * src[j] + src[j + stride] + 1) *
                     +                        683) >> 11) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((4 * src[j]          + 3 * src[j + 1] +
                     +                         3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                     +                        2731) >> 15) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((3 * src[j]          + 2 * src[j + 1] +
                     +                         4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     +                        2731) >> 15) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((src[j] + 2 * src[j + stride] + 1) *
                     +                        683) >> 11) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((3 * src[j]          + 4 * src[j + 1] +
                     +                         2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                     +                        2731) >> 15) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                     +                                          int stride, int width, int height)
                     +{
                     +    int i, j;
+                    +
                     +    for (i = 0; i < height; i++) {
                     +        for (j = 0; j < width; j++)
                     +            dst[j] = (dst[j] +
                     +                      (((2 * src[j]          + 3 * src[j + 1] +
                     +                         3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                     +                        2731) >> 15) + 1) >> 1;
                     +        src += stride;
                     +        dst += stride;
                     +    }
                     +}
+                    +
                     +av_cold void ff_tpeldsp_init(TpelDSPContext *c)
                     +{
                     +    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
                     +    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
                     +    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
                     +    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
                     +    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
                     +    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
                     +    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
                     +    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
                     +    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
+                    +
                     +    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
                     +    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
                     +    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
                     +    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
                     +    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
                     +    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
                     +    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
                     +    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
                     +    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
                     +}

libavcodec/tpeldsp.h

History View file @ 9d6a27d

                     new file mode 100644
@@ -0,0 +1,59 @@
                     +/*
                     + * thirdpel DSP functions
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +/**
                     + * @file
                     + * thirdpel DSP functions
                     + */
+                    +
                     +#ifndef AVCODEC_TPELDSP_H
                     +#define AVCODEC_TPELDSP_H
+                    +
                     +#include <stdint.h>
+                    +
                     +/* add and put pixel (decoding) */
                     +// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16
                     +// h for hpel_pixels_func is limited to {width/2, width} but never larger
                     +// than 16 and never smaller than 4
                     +typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
                     +                             const uint8_t *pixels /* align 1 */,
                     +                             int line_size, int w, int h);
+                    +
                     +/**
                     + * thirdpel DSP context
                     + */
                     +typedef struct TpelDSPContext {
                     +    /**
                     +     * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
                     +     * this is an array[12] of motion compensation functions for the
                     +     * 9 thirdpel positions<br>
                     +     * *pixels_tab[xthirdpel + 4 * ythirdpel]
                     +     * @param block destination where the result is stored
                     +     * @param pixels source
                     +     * @param line_size number of bytes in a horizontal line of block
                     +     * @param h height
                     +     */
                     +    tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
                     +    tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
                     +} TpelDSPContext;
+                    +
                     +void ff_tpeldsp_init(TpelDSPContext *c);
+                    +
                     +#endif /* AVCODEC_TPELDSP_H */