GitList

Browse code

dsputil: Split off pixel block routines into their own context

Diego Biurrun authored on 2014/02/04 07:29:09
Showing 27 changed files

configure index dd421ad..7a29e82 100755
libavcodec/Makefile index 7fa15d5..632b0da 100644
libavcodec/arm/Makefile index 6c90e13..9ba6c20 100644
libavcodec/arm/dsputil_armv6.S index 8ff0e4a..436e20d 100644
libavcodec/arm/dsputil_init_armv6.c index 85ee8e1..bb93d66 100644
libavcodec/arm/pixblockdsp_armv6.S index 0000000..4c925a4
libavcodec/arm/pixblockdsp_init_arm.c index 0000000..f20769b
libavcodec/asv.h index 71d2583..9ae3737 100644
libavcodec/asvenc.c index 1a7021f..9944ffa 100644
libavcodec/dnxhdenc.c index 6ba0e12..e656b6e 100644
libavcodec/dsputil.c index 0dbd910..8d0cef2 100644
libavcodec/dsputil.h index e640f3a..b189305 100644
libavcodec/dsputilenc_template.c index 94e0d7c..0000000
libavcodec/dvenc.c index a4e5376..9f458e3 100644
libavcodec/mpegvideo.h index b2da21d..b498d50 100644
libavcodec/mpegvideo_enc.c index 0cf7ed4..e2504c7 100644
libavcodec/pixblockdsp.c index 0000000..71423f9
libavcodec/pixblockdsp.h index 0000000..8094d14
libavcodec/pixblockdsp_template.c index 0000000..71d3cf1
libavcodec/ppc/Makefile index 5118b9e..64938b6 100644
libavcodec/ppc/dsputil_altivec.c index efb4a18..2dfe17b 100644
libavcodec/ppc/pixblockdsp.c index 0000000..698d655
libavcodec/x86/Makefile index afb26d0..7c5ac3d 100644
libavcodec/x86/dsputilenc.asm index d996962..8d989c2 100644
libavcodec/x86/dsputilenc_mmx.c index fb649e4..5a7d911 100644
libavcodec/x86/pixblockdsp.asm index 0000000..c8fd1b2
libavcodec/x86/pixblockdsp_init.c index 0000000..9582e0b

@@ -1558,6 +1558,7 @@ CONFIG_EXTRA="
                          mpegvideo
                          mpegvideoenc
                          nettle
                     +    pixblockdsp
                          qpeldsp
                          rangecoder
                          riffdec
@@ -1706,7 +1707,7 @@ threads_if_any="$THREADS_LIST"
                      # subsystems
                      dct_select="rdft"
                     -dsputil_select="fdctdsp idctdsp"
                     +dsputil_select="fdctdsp idctdsp pixblockdsp"
                      error_resilience_select="dsputil"
                      intrax8_select="error_resilience"
                      mdct_select="fft"
@@ -1715,7 +1716,7 @@ mpeg_er_select="error_resilience"
                      mpegaudio_select="mpegaudiodsp"
                      mpegaudiodsp_select="dct"
                      mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
                     -mpegvideoenc_select="dsputil mpegvideo qpeldsp"
                     +mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp"
                      # decoders / encoders
                      aac_decoder_select="mdct sinewin"
@@ -1732,9 +1733,9 @@ amrwb_decoder_select="lsp"
                      amv_decoder_select="sp5x_decoder"
                      ape_decoder_select="bswapdsp"
                      asv1_decoder_select="blockdsp bswapdsp idctdsp"
                     -asv1_encoder_select="bswapdsp dsputil fdctdsp"
                     +asv1_encoder_select="bswapdsp fdctdsp pixblockdsp"
                      asv2_decoder_select="blockdsp bswapdsp idctdsp"
                     -asv2_encoder_select="bswapdsp dsputil fdctdsp"
                     +asv2_encoder_select="bswapdsp fdctdsp pixblockdsp"
                      atrac1_decoder_select="mdct sinewin"
                      atrac3_decoder_select="mdct"
                      atrac3p_decoder_select="mdct sinewin"
@@ -1749,9 +1750,9 @@ cscd_decoder_select="lzo"
                      cscd_decoder_suggest="zlib"
                      dca_decoder_select="mdct"
                      dnxhd_decoder_select="blockdsp idctdsp"
                     -dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc"
                     +dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
                      dvvideo_decoder_select="dvprofile idctdsp"
                     -dvvideo_encoder_select="dsputil dvprofile fdctdsp"
                     +dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp"
                      dxa_decoder_deps="zlib"
                      eac3_decoder_select="ac3_decoder"
                      eac3_encoder_select="ac3_encoder"

libavcodec/Makefile

History View file @ f46bb60

@@ -72,6 +72,7 @@ OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideodsp.o    \
                      OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
                                                                motion_est.o ratecontrol.o    \
                                                                mpegvideoencdsp.o
                     +OBJS-$(CONFIG_PIXBLOCKDSP)             += pixblockdsp.o
                      OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
                      OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
                      RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o

libavcodec/arm/Makefile

History View file @ f46bb60

@@ -23,6 +23,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
                      OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
                      OBJS-$(CONFIG_MPEGVIDEOENC)            += arm/mpegvideoencdsp_init_arm.o
                      OBJS-$(CONFIG_NEON_CLOBBER_TEST)       += arm/neontest.o
                     +OBJS-$(CONFIG_PIXBLOCKDSP)             += arm/pixblockdsp_init_arm.o
                      OBJS-$(CONFIG_VIDEODSP)                += arm/videodsp_init_arm.o
                      OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
@@ -62,6 +63,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP)           += arm/idctdsp_init_armv6.o      \
                                                                arm/simple_idct_armv6.o
                      ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
                      ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC)      += arm/mpegvideoencdsp_armv6.o
                     +ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP)       += arm/pixblockdsp_armv6.o
                      ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
                      ARMV6-OBJS-$(CONFIG_VP7_DECODER)       += arm/vp8_armv6.o               \

libavcodec/arm/dsputil_armv6.S

History View file @ f46bb60

@@ -20,61 +20,6 @@
                      #include "libavutil/arm/asm.S"
                     -function ff_get_pixels_armv6, export=1
                     -        pld             [r1, r2]
                     -        push            {r4-r8, lr}
                     -        mov             lr,  #8
                     -1:
                     -        ldrd_post       r4,  r5,  r1,  r2
                     -        subs            lr,  lr,  #1
                     -        uxtb16          r6,  r4
                     -        uxtb16          r4,  r4,  ror #8
                     -        uxtb16          r12, r5
                     -        uxtb16          r8,  r5,  ror #8
                     -        pld             [r1, r2]
                     -        pkhbt           r5,  r6,  r4,  lsl #16
                     -        pkhtb           r6,  r4,  r6,  asr #16
                     -        pkhbt           r7,  r12, r8,  lsl #16
                     -        pkhtb           r12, r8,  r12, asr #16
                     -        stm             r0!, {r5,r6,r7,r12}
                     -        bgt             1b
+                    -
                     -        pop             {r4-r8, pc}
                     -endfunc
+                    -
                     -function ff_diff_pixels_armv6, export=1
                     -        pld             [r1, r3]
                     -        pld             [r2, r3]
                     -        push            {r4-r9, lr}
                     -        mov             lr,  #8
                     -1:
                     -        ldrd_post       r4,  r5,  r1,  r3
                     -        ldrd_post       r6,  r7,  r2,  r3
                     -        uxtb16          r8,  r4
                     -        uxtb16          r4,  r4,  ror #8
                     -        uxtb16          r9,  r6
                     -        uxtb16          r6,  r6,  ror #8
                     -        pld             [r1, r3]
                     -        ssub16          r9,  r8,  r9
                     -        ssub16          r6,  r4,  r6
                     -        uxtb16          r8,  r5
                     -        uxtb16          r5,  r5,  ror #8
                     -        pld             [r2, r3]
                     -        pkhbt           r4,  r9,  r6,  lsl #16
                     -        pkhtb           r6,  r6,  r9,  asr #16
                     -        uxtb16          r9,  r7
                     -        uxtb16          r7,  r7,  ror #8
                     -        ssub16          r9,  r8,  r9
                     -        ssub16          r5,  r5,  r7
                     -        subs            lr,  lr,  #1
                     -        pkhbt           r8,  r9,  r5,  lsl #16
                     -        pkhtb           r9,  r5,  r9,  asr #16
                     -        stm             r0!, {r4,r6,r8,r9}
                     -        bgt             1b
+                    -
                     -        pop             {r4-r9, pc}
                     -endfunc
+                    -
                      function ff_pix_abs16_armv6, export=1
                              ldr             r0,  [sp]
                              push            {r4-r9, lr}

libavcodec/arm/dsputil_init_armv6.c

History View file @ f46bb60

@@ -26,10 +26,6 @@
                      #include "libavcodec/mpegvideo.h"
                      #include "dsputil_arm.h"
                     -void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
                     -void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
                     -                          const uint8_t *s2, int stride);
+                    -
                      int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
                                             int line_size, int h);
                      int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
@@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
                      av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
                                                         unsigned high_bit_depth)
+                     {
                     -    if (!high_bit_depth)
                     -        c->get_pixels = ff_get_pixels_armv6;
                     -    c->diff_pixels = ff_diff_pixels_armv6;
+                    -
                          c->pix_abs[0][0] = ff_pix_abs16_armv6;
                          c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;
                          c->pix_abs[0][2] = ff_pix_abs16_y2_armv6;

libavcodec/arm/pixblockdsp_armv6.S

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,76 @@
                     +/*
                     + * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "libavutil/arm/asm.S"
+                    +
                     +function ff_get_pixels_armv6, export=1
                     +        pld             [r1, r2]
                     +        push            {r4-r8, lr}
                     +        mov             lr,  #8
                     +1:
                     +        ldrd_post       r4,  r5,  r1,  r2
                     +        subs            lr,  lr,  #1
                     +        uxtb16          r6,  r4
                     +        uxtb16          r4,  r4,  ror #8
                     +        uxtb16          r12, r5
                     +        uxtb16          r8,  r5,  ror #8
                     +        pld             [r1, r2]
                     +        pkhbt           r5,  r6,  r4,  lsl #16
                     +        pkhtb           r6,  r4,  r6,  asr #16
                     +        pkhbt           r7,  r12, r8,  lsl #16
                     +        pkhtb           r12, r8,  r12, asr #16
                     +        stm             r0!, {r5,r6,r7,r12}
                     +        bgt             1b
+                    +
                     +        pop             {r4-r8, pc}
                     +endfunc
+                    +
                     +function ff_diff_pixels_armv6, export=1
                     +        pld             [r1, r3]
                     +        pld             [r2, r3]
                     +        push            {r4-r9, lr}
                     +        mov             lr,  #8
                     +1:
                     +        ldrd_post       r4,  r5,  r1,  r3
                     +        ldrd_post       r6,  r7,  r2,  r3
                     +        uxtb16          r8,  r4
                     +        uxtb16          r4,  r4,  ror #8
                     +        uxtb16          r9,  r6
                     +        uxtb16          r6,  r6,  ror #8
                     +        pld             [r1, r3]
                     +        ssub16          r9,  r8,  r9
                     +        ssub16          r6,  r4,  r6
                     +        uxtb16          r8,  r5
                     +        uxtb16          r5,  r5,  ror #8
                     +        pld             [r2, r3]
                     +        pkhbt           r4,  r9,  r6,  lsl #16
                     +        pkhtb           r6,  r6,  r9,  asr #16
                     +        uxtb16          r9,  r7
                     +        uxtb16          r7,  r7,  ror #8
                     +        ssub16          r9,  r8,  r9
                     +        ssub16          r5,  r5,  r7
                     +        subs            lr,  lr,  #1
                     +        pkhbt           r8,  r9,  r5,  lsl #16
                     +        pkhtb           r9,  r5,  r9,  asr #16
                     +        stm             r0!, {r4,r6,r8,r9}
                     +        bgt             1b
+                    +
                     +        pop             {r4-r9, pc}
                     +endfunc

libavcodec/arm/pixblockdsp_init_arm.c

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,42 @@
                     +/*
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include <stdint.h>
+                    +
                     +#include "libavutil/attributes.h"
                     +#include "libavutil/cpu.h"
                     +#include "libavutil/arm/cpu.h"
                     +#include "libavcodec/avcodec.h"
                     +#include "libavcodec/pixblockdsp.h"
+                    +
                     +void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
                     +void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
                     +                          const uint8_t *s2, int stride);
+                    +
                     +av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
                     +                                     AVCodecContext *avctx,
                     +                                     unsigned high_bit_depth)
                     +{
                     +    int cpu_flags = av_get_cpu_flags();
+                    +
                     +    if (have_armv6(cpu_flags)) {
                     +        if (!high_bit_depth)
                     +            c->get_pixels = ff_get_pixels_armv6;
                     +        c->diff_pixels = ff_diff_pixels_armv6;
                     +    }
                     +}

libavcodec/asv.h

History View file @ f46bb60

@@ -33,19 +33,19 @@
                      #include "avcodec.h"
                      #include "blockdsp.h"
                      #include "bswapdsp.h"
                     -#include "dsputil.h"
                      #include "fdctdsp.h"
                      #include "idctdsp.h"
                      #include "get_bits.h"
                     +#include "pixblockdsp.h"
                      #include "put_bits.h"
                      typedef struct ASV1Context{
                          AVCodecContext *avctx;
                          BlockDSPContext bdsp;
                          BswapDSPContext bbdsp;
                     -    DSPContext dsp;
                          FDCTDSPContext fdsp;
                          IDCTDSPContext idsp;
                     +    PixblockDSPContext pdsp;
                          PutBitContext pb;
                          GetBitContext gb;
                          ScanTable scantable;

libavcodec/asvenc.c

History View file @ f46bb60

@@ -159,16 +159,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame,
                          uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
                          uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
                     -    a->dsp.get_pixels(block[0], ptr_y                 , linesize);
                     -    a->dsp.get_pixels(block[1], ptr_y              + 8, linesize);
                     -    a->dsp.get_pixels(block[2], ptr_y + 8*linesize    , linesize);
                     -    a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
                     +    a->pdsp.get_pixels(block[0], ptr_y,                    linesize);
                     +    a->pdsp.get_pixels(block[1], ptr_y + 8,                linesize);
                     +    a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize,     linesize);
                     +    a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize);
                          for(i=0; i<4; i++)
                              a->fdsp.fdct(block[i]);
                          if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
                     -        a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
                     -        a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
                     +        a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
                     +        a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
                              for(i=4; i<6; i++)
                                  a->fdsp.fdct(block[i]);
+                         }
@@ -248,8 +248,8 @@ static av_cold int encode_init(AVCodecContext *avctx){
                          avctx->coded_frame->key_frame = 1;
                          ff_asv_common_init(avctx);
                     -    ff_dsputil_init(&a->dsp, avctx);
                          ff_fdctdsp_init(&a->fdsp, avctx);
                     +    ff_pixblockdsp_init(&a->pdsp, avctx);
                          if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;

libavcodec/dnxhdenc.c

History View file @ f46bb60

@@ -30,10 +30,10 @@
                      #include "avcodec.h"
                      #include "blockdsp.h"
                     -#include "dsputil.h"
                      #include "fdctdsp.h"
                      #include "internal.h"
                      #include "mpegvideo.h"
                     +#include "pixblockdsp.h"
                      #include "dnxhdenc.h"
                      // The largest value that will not lead to overflow for 10bit samples.
@@ -308,10 +308,10 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
                          avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
                          ff_blockdsp_init(&ctx->bdsp, avctx);
                     -    ff_dsputil_init(&ctx->m.dsp, avctx);
                          ff_fdctdsp_init(&ctx->m.fdsp, avctx);
                          ff_idctdsp_init(&ctx->m.idsp, avctx);
                          ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
                     +    ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
                          ff_dct_common_init(&ctx->m);
                          if (!ctx->m.dct_quantize)
                              ctx->m.dct_quantize = ff_dct_quantize_c;
@@ -540,12 +540,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
                                                 ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
                          const uint8_t *ptr_v = ctx->thread[0]->src[2] +
                                                 ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
                     -    DSPContext *dsp = &ctx->m.dsp;
                     +    PixblockDSPContext *pdsp = &ctx->m.pdsp;
                     -    dsp->get_pixels(ctx->blocks[0], ptr_y,      ctx->m.linesize);
                     -    dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
                     -    dsp->get_pixels(ctx->blocks[2], ptr_u,      ctx->m.uvlinesize);
                     -    dsp->get_pixels(ctx->blocks[3], ptr_v,      ctx->m.uvlinesize);
                     +    pdsp->get_pixels(ctx->blocks[0], ptr_y,      ctx->m.linesize);
                     +    pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
                     +    pdsp->get_pixels(ctx->blocks[2], ptr_u,      ctx->m.uvlinesize);
                     +    pdsp->get_pixels(ctx->blocks[3], ptr_v,      ctx->m.uvlinesize);
                          if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
                              if (ctx->interlaced) {
@@ -568,14 +568,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
                                  ctx->bdsp.clear_block(ctx->blocks[7]);
+                             }
                          } else {
                     -        dsp->get_pixels(ctx->blocks[4],
                     -                        ptr_y + ctx->dct_y_offset, ctx->m.linesize);
                     -        dsp->get_pixels(ctx->blocks[5],
                     -                        ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
                     -        dsp->get_pixels(ctx->blocks[6],
                     -                        ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
                     -        dsp->get_pixels(ctx->blocks[7],
                     -                        ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
                     +        pdsp->get_pixels(ctx->blocks[4],
                     +                         ptr_y + ctx->dct_y_offset, ctx->m.linesize);
                     +        pdsp->get_pixels(ctx->blocks[5],
                     +                         ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
                     +        pdsp->get_pixels(ctx->blocks[6],
                     +                         ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
                     +        pdsp->get_pixels(ctx->blocks[7],
                     +                         ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
+                         }
+                     }

libavcodec/dsputil.c

History View file @ f46bb60

@@ -35,13 +35,6 @@
                      uint32_t ff_square_tab[512] = { 0, };
                     -#define BIT_DEPTH 16
                     -#include "dsputilenc_template.c"
                     -#undef BIT_DEPTH
+                    -
                     -#define BIT_DEPTH 8
                     -#include "dsputilenc_template.c"
+                    -
                      static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                                        int line_size, int h)
+                     {
@@ -110,27 +103,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                          return s;
+                     }
                     -static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
                     -                          const uint8_t *s2, int stride)
                     -{
                     -    int i;
+                    -
                     -    /* read the pixels */
                     -    for (i = 0; i < 8; i++) {
                     -        block[0] = s1[0] - s2[0];
                     -        block[1] = s1[1] - s2[1];
                     -        block[2] = s1[2] - s2[2];
                     -        block[3] = s1[3] - s2[3];
                     -        block[4] = s1[4] - s2[4];
                     -        block[5] = s1[5] - s2[5];
                     -        block[6] = s1[6] - s2[6];
                     -        block[7] = s1[7] - s2[7];
                     -        s1      += stride;
                     -        s2      += stride;
                     -        block   += 8;
                     -    }
                     -}
+                    -
                      static int sum_abs_dctelem_c(int16_t *block)
+                     {
                          int sum = 0, i;
@@ -577,7 +549,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
                          assert(h == 8);
                     -    s->dsp.diff_pixels(temp, src1, src2, stride);
                     +    s->pdsp.diff_pixels(temp, src1, src2, stride);
                          s->fdsp.fdct(temp);
                          return s->dsp.sum_abs_dctelem(temp);
+                     }
@@ -617,7 +589,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
                          int16_t dct[8][8];
                          int i, sum = 0;
                     -    s->dsp.diff_pixels(dct[0], src1, src2, stride);
                     +    s->pdsp.diff_pixels(dct[0], src1, src2, stride);
                      #define SRC(x) dct[i][x]
                      #define DST(x, v) dct[i][x] = v
@@ -644,7 +616,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
                          assert(h == 8);
                     -    s->dsp.diff_pixels(temp, src1, src2, stride);
                     +    s->pdsp.diff_pixels(temp, src1, src2, stride);
                          s->fdsp.fdct(temp);
                          for (i = 0; i < 64; i++)
@@ -663,7 +635,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
                          assert(h == 8);
                          s->mb_intra = 0;
                     -    s->dsp.diff_pixels(temp, src1, src2, stride);
                     +    s->pdsp.diff_pixels(temp, src1, src2, stride);
                          memcpy(bak, temp, 64 * sizeof(int16_t));
@@ -694,7 +666,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
                          copy_block8(lsrc1, src1, 8, stride, 8);
                          copy_block8(lsrc2, src2, 8, stride, 8);
                     -    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
                     +    s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
                          s->block_last_index[0 /* FIXME */] =
                          last                               =
@@ -766,7 +738,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
                          assert(h == 8);
                     -    s->dsp.diff_pixels(temp, src1, src2, stride);
                     +    s->pdsp.diff_pixels(temp, src1, src2, stride);
                          s->block_last_index[0 /* FIXME */] =
                          last                               =
@@ -932,8 +904,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
+                     {
                          const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
                     -    c->diff_pixels = diff_pixels_c;
+                    -
                          c->sum_abs_dctelem = sum_abs_dctelem_c;
                          /* TODO [0] 16  [1] 8 */
@@ -975,16 +945,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
                          c->nsse[0] = nsse16_c;
                          c->nsse[1] = nsse8_c;
                     -    switch (avctx->bits_per_raw_sample) {
                     -    case 9:
                     -    case 10:
                     -        c->get_pixels = get_pixels_16_c;
                     -        break;
                     -    default:
                     -        c->get_pixels = get_pixels_8_c;
                     -        break;
                     -    }
+                    -
                          if (ARCH_ARM)
                              ff_dsputil_init_arm(c, avctx, high_bit_depth);
                          if (ARCH_PPC)

libavcodec/dsputil.h

History View file @ f46bb60

@@ -48,14 +48,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c,
                       * DSPContext.
                       */
                      typedef struct DSPContext {
                     -    /* pixel ops : interface with DCT */
                     -    void (*get_pixels)(int16_t *block /* align 16 */,
                     -                       const uint8_t *pixels /* align 8 */,
                     -                       int line_size);
                     -    void (*diff_pixels)(int16_t *block /* align 16 */,
                     -                        const uint8_t *s1 /* align 8 */,
                     -                        const uint8_t *s2 /* align 8 */,
                     -                        int stride);
                          int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
                          me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */

libavcodec/dsputilenc_template.c

History View file @ f46bb60

                     deleted file mode 100644
@@ -1,51 +0,0 @@
                     -/*
                     - * DSP utils
                     - * Copyright (c) 2000, 2001 Fabrice Bellard
                     - * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
                     - *
                     - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
                     - *
                     - * This file is part of Libav.
                     - *
                     - * Libav is free software; you can redistribute it and/or
                     - * modify it under the terms of the GNU Lesser General Public
                     - * License as published by the Free Software Foundation; either
                     - * version 2.1 of the License, or (at your option) any later version.
                     - *
                     - * Libav is distributed in the hope that it will be useful,
                     - * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     - * Lesser General Public License for more details.
                     - *
                     - * You should have received a copy of the GNU Lesser General Public
                     - * License along with Libav; if not, write to the Free Software
                     - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     - */
+                    -
                     -/**
                     - * @file
                     - * DSP utils
                     - */
+                    -
                     -#include "bit_depth_template.c"
+                    -
                     -static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
                     -                              int line_size)
                     -{
                     -    const pixel *pixels = (const pixel *) _pixels;
                     -    int i;
+                    -
                     -    /* read the pixels */
                     -    for (i = 0; i < 8; i++) {
                     -        block[0] = pixels[0];
                     -        block[1] = pixels[1];
                     -        block[2] = pixels[2];
                     -        block[3] = pixels[3];
                     -        block[4] = pixels[4];
                     -        block[5] = pixels[5];
                     -        block[6] = pixels[6];
                     -        block[7] = pixels[7];
                     -        pixels  += line_size / sizeof(pixel);
                     -        block   += 8;
                     -    }
                     -}

libavcodec/dvenc.c

History View file @ f46bb60

@@ -31,6 +31,7 @@
                      #include "dsputil.h"
                      #include "fdctdsp.h"
                      #include "internal.h"
                     +#include "pixblockdsp.h"
                      #include "put_bits.h"
                      #include "dv.h"
                      #include "dv_tablegen.h"
@@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
                          DVVideoContext *s = avctx->priv_data;
                          DSPContext dsp;
                          FDCTDSPContext fdsp;
                     +    PixblockDSPContext pdsp;
                          int ret;
                          s->sys = avpriv_dv_codec_profile(avctx);
@@ -65,9 +67,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
                          ff_dsputil_init(&dsp, avctx);
                          ff_fdctdsp_init(&fdsp, avctx);
                     +    ff_pixblockdsp_init(&pdsp, avctx);
                          ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
                     -    s->get_pixels = dsp.get_pixels;
                     +    s->get_pixels = pdsp.get_pixels;
                          s->ildct_cmp  = dsp.ildct_cmp[5];
                          s->fdct[0]    = fdsp.fdct;

libavcodec/mpegvideo.h

History View file @ f46bb60

@@ -39,6 +39,7 @@
                      #include "idctdsp.h"
                      #include "mpegvideodsp.h"
                      #include "mpegvideoencdsp.h"
                     +#include "pixblockdsp.h"
                      #include "put_bits.h"
                      #include "ratecontrol.h"
                      #include "parser.h"
@@ -361,6 +362,7 @@ typedef struct MpegEncContext {
                          IDCTDSPContext idsp;
                          MpegVideoDSPContext mdsp;
                          MpegvideoEncDSPContext mpvencdsp;
                     +    PixblockDSPContext pdsp;
                          QpelDSPContext qdsp;
                          VideoDSPContext vdsp;
                          H263DSPContext h263dsp;

libavcodec/mpegvideo_enc.c

History View file @ f46bb60

@@ -37,7 +37,6 @@
                      #include "libavutil/timer.h"
                      #include "avcodec.h"
                      #include "dct.h"
                     -#include "dsputil.h"
                      #include "idctdsp.h"
                      #include "mpeg12.h"
                      #include "mpegvideo.h"
@@ -48,6 +47,7 @@
                      #include "mpegutils.h"
                      #include "mjpegenc.h"
                      #include "msmpeg4.h"
                     +#include "pixblockdsp.h"
                      #include "qpeldsp.h"
                      #include "faandct.h"
                      #include "thread.h"
@@ -703,6 +703,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
                          ff_fdctdsp_init(&s->fdsp, avctx);
                          ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
                     +    ff_pixblockdsp_init(&s->pdsp, avctx);
                          ff_qpeldsp_init(&s->qdsp);
                          s->avctx->coded_frame = s->current_picture.f;
@@ -1943,22 +1944,22 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
+                                 }
+                             }
                     -        s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
                     -        s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
                     -        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
                     -        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
                     +        s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
                     +        s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
                     +        s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
                     +        s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
                              if (s->flags & CODEC_FLAG_GRAY) {
                                  skip_dct[4] = 1;
                                  skip_dct[5] = 1;
                              } else {
                     -            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
                     -            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
                     +            s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
                     +            s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
                                  if (!s->chroma_y_shift) { /* 422 */
                     -                s->dsp.get_pixels(s->block[6],
                     -                                  ptr_cb + (dct_offset >> 1), wrap_c);
                     -                s->dsp.get_pixels(s->block[7],
                     -                                  ptr_cr + (dct_offset >> 1), wrap_c);
                     +                s->pdsp.get_pixels(s->block[6],
                     +                                   ptr_cb + (dct_offset >> 1), wrap_c);
                     +                s->pdsp.get_pixels(s->block[7],
                     +                                   ptr_cr + (dct_offset >> 1), wrap_c);
+                                 }
+                             }
                          } else {
@@ -2024,24 +2025,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
+                                 }
+                             }
                     -        s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
                     -        s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
                     -        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
                     -                           dest_y + dct_offset, wrap_y);
                     -        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
                     -                           dest_y + dct_offset + 8, wrap_y);
                     +        s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
                     +        s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
                     +        s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
                     +                            dest_y + dct_offset, wrap_y);
                     +        s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
                     +                            dest_y + dct_offset + 8, wrap_y);
                              if (s->flags & CODEC_FLAG_GRAY) {
                                  skip_dct[4] = 1;
                                  skip_dct[5] = 1;
                              } else {
                     -            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
                     -            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
                     +            s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
                     +            s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
                                  if (!s->chroma_y_shift) { /* 422 */
                     -                s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
                     -                                   dest_cb + (dct_offset >> 1), wrap_c);
                     -                s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
                     -                                   dest_cr + (dct_offset >> 1), wrap_c);
                     +                s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
                     +                                    dest_cb + (dct_offset >> 1), wrap_c);
                     +                s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
                     +                                    dest_cr + (dct_offset >> 1), wrap_c);
+                                 }
+                             }
                              /* pre quantization */

libavcodec/pixblockdsp.c

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,76 @@
                     +/*
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include <stdint.h>
+                    +
                     +#include "config.h"
                     +#include "libavutil/attributes.h"
                     +#include "avcodec.h"
                     +#include "pixblockdsp.h"
+                    +
                     +#define BIT_DEPTH 16
                     +#include "pixblockdsp_template.c"
                     +#undef BIT_DEPTH
+                    +
                     +#define BIT_DEPTH 8
                     +#include "pixblockdsp_template.c"
+                    +
                     +static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
                     +                          const uint8_t *s2, int stride)
                     +{
                     +    int i;
+                    +
                     +    /* read the pixels */
                     +    for (i = 0; i < 8; i++) {
                     +        block[0] = s1[0] - s2[0];
                     +        block[1] = s1[1] - s2[1];
                     +        block[2] = s1[2] - s2[2];
                     +        block[3] = s1[3] - s2[3];
                     +        block[4] = s1[4] - s2[4];
                     +        block[5] = s1[5] - s2[5];
                     +        block[6] = s1[6] - s2[6];
                     +        block[7] = s1[7] - s2[7];
                     +        s1      += stride;
                     +        s2      += stride;
                     +        block   += 8;
                     +    }
                     +}
+                    +
                     +av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
                     +{
                     +    const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
+                    +
                     +    c->diff_pixels = diff_pixels_c;
+                    +
                     +    switch (avctx->bits_per_raw_sample) {
                     +    case 9:
                     +    case 10:
                     +        c->get_pixels = get_pixels_16_c;
                     +        break;
                     +    default:
                     +        c->get_pixels = get_pixels_8_c;
                     +        break;
                     +    }
+                    +
                     +    if (ARCH_ARM)
                     +        ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
                     +    if (ARCH_PPC)
                     +        ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
                     +    if (ARCH_X86)
                     +        ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
                     +}

libavcodec/pixblockdsp.h

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,44 @@
                     +/*
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#ifndef AVCODEC_PIXBLOCKDSP_H
                     +#define AVCODEC_PIXBLOCKDSP_H
+                    +
                     +#include <stdint.h>
+                    +
                     +#include "avcodec.h"
+                    +
                     +typedef struct PixblockDSPContext {
                     +    void (*get_pixels)(int16_t *block /* align 16 */,
                     +                       const uint8_t *pixels /* align 8 */,
                     +                       int line_size);
                     +    void (*diff_pixels)(int16_t *block /* align 16 */,
                     +                        const uint8_t *s1 /* align 8 */,
                     +                        const uint8_t *s2 /* align 8 */,
                     +                        int stride);
                     +} PixblockDSPContext;
+                    +
                     +void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
                     +void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
                     +                             unsigned high_bit_depth);
                     +void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
                     +                             unsigned high_bit_depth);
                     +void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
                     +                             unsigned high_bit_depth);
+                    +
                     +#endif /* AVCODEC_PIXBLOCKDSP_H */

libavcodec/pixblockdsp_template.c

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,40 @@
                     +/*
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "bit_depth_template.c"
+                    +
                     +static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
                     +                              int line_size)
                     +{
                     +    const pixel *pixels = (const pixel *) _pixels;
                     +    int i;
+                    +
                     +    /* read the pixels */
                     +    for (i = 0; i < 8; i++) {
                     +        block[0] = pixels[0];
                     +        block[1] = pixels[1];
                     +        block[2] = pixels[2];
                     +        block[3] = pixels[3];
                     +        block[4] = pixels[4];
                     +        block[5] = pixels[5];
                     +        block[6] = pixels[6];
                     +        block[7] = pixels[7];
                     +        pixels  += line_size / sizeof(pixel);
                     +        block   += 8;
                     +    }
                     +}

libavcodec/ppc/Makefile

History View file @ f46bb60

@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += ppc/mpegaudiodsp_altivec.o
                      OBJS-$(CONFIG_MPEGVIDEO)               += ppc/mpegvideo_altivec.o      \
                                                                ppc/mpegvideodsp.o
                      OBJS-$(CONFIG_MPEGVIDEOENC)            += ppc/mpegvideoencdsp.o
                     +OBJS-$(CONFIG_PIXBLOCKDSP)             += ppc/pixblockdsp.o
                      OBJS-$(CONFIG_VIDEODSP)                += ppc/videodsp_ppc.o
                      OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o

libavcodec/ppc/dsputil_altivec.c

History View file @ f46bb60

@@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                          return s;
+                     }
                     -static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
                     -                               int line_size)
                     -{
                     -    int i;
                     -    vector unsigned char perm = vec_lvsl(0, pixels);
                     -    const vector unsigned char zero =
                     -        (const vector unsigned char) vec_splat_u8(0);
+                    -
                     -    for (i = 0; i < 8; i++) {
                     -        /* Read potentially unaligned pixels.
                     -         * We're reading 16 pixels, and actually only want 8,
                     -         * but we simply ignore the extras. */
                     -        vector unsigned char pixl = vec_ld(0, pixels);
                     -        vector unsigned char pixr = vec_ld(7, pixels);
                     -        vector unsigned char bytes = vec_perm(pixl, pixr, perm);
+                    -
                     -        // Convert the bytes into shorts.
                     -        vector signed short shorts = (vector signed short) vec_mergeh(zero,
                     -                                                                      bytes);
+                    -
                     -        // Save the data to the block, we assume the block is 16-byte aligned.
                     -        vec_st(shorts, i * 16, (vector signed short *) block);
+                    -
                     -        pixels += line_size;
                     -    }
                     -}
+                    -
                     -static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
                     -                                const uint8_t *s2, int stride)
                     -{
                     -    int i;
                     -    vector unsigned char perm1 = vec_lvsl(0, s1);
                     -    vector unsigned char perm2 = vec_lvsl(0, s2);
                     -    const vector unsigned char zero =
                     -        (const vector unsigned char) vec_splat_u8(0);
                     -    vector signed short shorts1, shorts2;
+                    -
                     -    for (i = 0; i < 4; i++) {
                     -        /* Read potentially unaligned pixels.
                     -         * We're reading 16 pixels, and actually only want 8,
                     -         * but we simply ignore the extras. */
                     -        vector unsigned char pixl  = vec_ld(0,  s1);
                     -        vector unsigned char pixr  = vec_ld(15, s1);
                     -        vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
+                    -
                     -        // Convert the bytes into shorts.
                     -        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+                    -
                     -        // Do the same for the second block of pixels.
                     -        pixl  = vec_ld(0,  s2);
                     -        pixr  = vec_ld(15, s2);
                     -        bytes = vec_perm(pixl, pixr, perm2);
+                    -
                     -        // Convert the bytes into shorts.
                     -        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+                    -
                     -        // Do the subtraction.
                     -        shorts1 = vec_sub(shorts1, shorts2);
+                    -
                     -        // Save the data to the block, we assume the block is 16-byte aligned.
                     -        vec_st(shorts1, 0, (vector signed short *) block);
+                    -
                     -        s1    += stride;
                     -        s2    += stride;
                     -        block += 8;
+                    -
                     -        /* The code below is a copy of the code above...
                     -         * This is a manual unroll. */
+                    -
                     -        /* Read potentially unaligned pixels.
                     -         * We're reading 16 pixels, and actually only want 8,
                     -         * but we simply ignore the extras. */
                     -        pixl  = vec_ld(0,  s1);
                     -        pixr  = vec_ld(15, s1);
                     -        bytes = vec_perm(pixl, pixr, perm1);
+                    -
                     -        // Convert the bytes into shorts.
                     -        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+                    -
                     -        // Do the same for the second block of pixels.
                     -        pixl  = vec_ld(0,  s2);
                     -        pixr  = vec_ld(15, s2);
                     -        bytes = vec_perm(pixl, pixr, perm2);
+                    -
                     -        // Convert the bytes into shorts.
                     -        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+                    -
                     -        // Do the subtraction.
                     -        shorts1 = vec_sub(shorts1, shorts2);
+                    -
                     -        // Save the data to the block, we assume the block is 16-byte aligned.
                     -        vec_st(shorts1, 0, (vector signed short *) block);
+                    -
                     -        s1    += stride;
                     -        s2    += stride;
                     -        block += 8;
                     -    }
                     -}
+                    -
                      static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
                                                           uint8_t *src, int stride, int h)
+                     {
@@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
                          c->sse[0] = sse16_altivec;
                          c->sse[1] = sse8_altivec;
                     -    c->diff_pixels = diff_pixels_altivec;
+                    -
                     -    if (!high_bit_depth) {
                     -        c->get_pixels = get_pixels_altivec;
                     -    }
+                    -
                          c->hadamard8_diff[0] = hadamard8_diff16_altivec;
                          c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
+                     }

libavcodec/ppc/pixblockdsp.c

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,153 @@
                     +/*
                     + * Copyright (c) 2002 Brian Foley
                     + * Copyright (c) 2002 Dieter Shirley
                     + * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "config.h"
                     +#if HAVE_ALTIVEC_H
                     +#include <altivec.h>
                     +#endif
+                    +
                     +#include "libavutil/attributes.h"
                     +#include "libavutil/cpu.h"
                     +#include "libavutil/ppc/cpu.h"
                     +#include "libavutil/ppc/types_altivec.h"
                     +#include "libavutil/ppc/util_altivec.h"
                     +#include "libavcodec/avcodec.h"
                     +#include "libavcodec/pixblockdsp.h"
+                    +
                     +#if HAVE_ALTIVEC
+                    +
                     +static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
                     +                               int line_size)
                     +{
                     +    int i;
                     +    vector unsigned char perm = vec_lvsl(0, pixels);
                     +    const vector unsigned char zero =
                     +        (const vector unsigned char) vec_splat_u8(0);
+                    +
                     +    for (i = 0; i < 8; i++) {
                     +        /* Read potentially unaligned pixels.
                     +         * We're reading 16 pixels, and actually only want 8,
                     +         * but we simply ignore the extras. */
                     +        vector unsigned char pixl = vec_ld(0, pixels);
                     +        vector unsigned char pixr = vec_ld(7, pixels);
                     +        vector unsigned char bytes = vec_perm(pixl, pixr, perm);
+                    +
                     +        // Convert the bytes into shorts.
                     +        vector signed short shorts = (vector signed short) vec_mergeh(zero,
                     +                                                                      bytes);
+                    +
                     +        // Save the data to the block, we assume the block is 16-byte aligned.
                     +        vec_st(shorts, i * 16, (vector signed short *) block);
+                    +
                     +        pixels += line_size;
                     +    }
                     +}
+                    +
                     +static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
                     +                                const uint8_t *s2, int stride)
                     +{
                     +    int i;
                     +    vector unsigned char perm1 = vec_lvsl(0, s1);
                     +    vector unsigned char perm2 = vec_lvsl(0, s2);
                     +    const vector unsigned char zero =
                     +        (const vector unsigned char) vec_splat_u8(0);
                     +    vector signed short shorts1, shorts2;
+                    +
                     +    for (i = 0; i < 4; i++) {
                     +        /* Read potentially unaligned pixels.
                     +         * We're reading 16 pixels, and actually only want 8,
                     +         * but we simply ignore the extras. */
                     +        vector unsigned char pixl  = vec_ld(0,  s1);
                     +        vector unsigned char pixr  = vec_ld(15, s1);
                     +        vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
+                    +
                     +        // Convert the bytes into shorts.
                     +        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+                    +
                     +        // Do the same for the second block of pixels.
                     +        pixl  = vec_ld(0,  s2);
                     +        pixr  = vec_ld(15, s2);
                     +        bytes = vec_perm(pixl, pixr, perm2);
+                    +
                     +        // Convert the bytes into shorts.
                     +        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+                    +
                     +        // Do the subtraction.
                     +        shorts1 = vec_sub(shorts1, shorts2);
+                    +
                     +        // Save the data to the block, we assume the block is 16-byte aligned.
                     +        vec_st(shorts1, 0, (vector signed short *) block);
+                    +
                     +        s1    += stride;
                     +        s2    += stride;
                     +        block += 8;
+                    +
                     +        /* The code below is a copy of the code above...
                     +         * This is a manual unroll. */
+                    +
                     +        /* Read potentially unaligned pixels.
                     +         * We're reading 16 pixels, and actually only want 8,
                     +         * but we simply ignore the extras. */
                     +        pixl  = vec_ld(0,  s1);
                     +        pixr  = vec_ld(15, s1);
                     +        bytes = vec_perm(pixl, pixr, perm1);
+                    +
                     +        // Convert the bytes into shorts.
                     +        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
+                    +
                     +        // Do the same for the second block of pixels.
                     +        pixl  = vec_ld(0,  s2);
                     +        pixr  = vec_ld(15, s2);
                     +        bytes = vec_perm(pixl, pixr, perm2);
+                    +
                     +        // Convert the bytes into shorts.
                     +        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
+                    +
                     +        // Do the subtraction.
                     +        shorts1 = vec_sub(shorts1, shorts2);
+                    +
                     +        // Save the data to the block, we assume the block is 16-byte aligned.
                     +        vec_st(shorts1, 0, (vector signed short *) block);
+                    +
                     +        s1    += stride;
                     +        s2    += stride;
                     +        block += 8;
                     +    }
                     +}
+                    +
                     +#endif /* HAVE_ALTIVEC */
+                    +
                     +av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
                     +                                     AVCodecContext *avctx,
                     +                                     unsigned high_bit_depth)
                     +{
                     +#if HAVE_ALTIVEC
                     +    if (!PPC_ALTIVEC(av_get_cpu_flags()))
                     +        return;
+                    +
                     +    c->diff_pixels = diff_pixels_altivec;
+                    +
                     +    if (!high_bit_depth) {
                     +        c->get_pixels = get_pixels_altivec;
                     +    }
                     +#endif /* HAVE_ALTIVEC */
                     +}

libavcodec/x86/Makefile

History View file @ f46bb60

@@ -26,6 +26,7 @@ OBJS-$(CONFIG_MPEGVIDEO)               += x86/mpegvideo.o              \
                                                                x86/mpegvideodsp.o
                      OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o           \
                                                                x86/mpegvideoencdsp_init.o
                     +OBJS-$(CONFIG_PIXBLOCKDSP)             += x86/pixblockdsp_init.o
                      OBJS-$(CONFIG_QPELDSP)                 += x86/qpeldsp_init.o
                      OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
                      OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
@@ -93,6 +94,7 @@ YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
                      YASM-OBJS-$(CONFIG_HUFFYUVDSP)         += x86/huffyuvdsp.o
                      YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
                      YASM-OBJS-$(CONFIG_MPEGVIDEOENC)       += x86/mpegvideoencdsp.o
                     +YASM-OBJS-$(CONFIG_PIXBLOCKDSP)        += x86/pixblockdsp.o
                      YASM-OBJS-$(CONFIG_QPELDSP)            += x86/qpeldsp.o                 \
                                                                x86/fpel.o                    \
                                                                x86/qpel.o

libavcodec/x86/dsputilenc.asm

History View file @ f46bb60

@@ -334,87 +334,3 @@ cglobal sse16, 5, 5, 8
                          paddd     m7, m1
                          movd     eax, m7         ; return value
                          RET
+                    -
                     -INIT_MMX mmx
                     -; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
                     -cglobal get_pixels, 3,4
                     -    movsxdifnidn r2, r2d
                     -    add          r0, 128
                     -    mov          r3, -128
                     -    pxor         m7, m7
                     -.loop:
                     -    mova         m0, [r1]
                     -    mova         m2, [r1+r2]
                     -    mova         m1, m0
                     -    mova         m3, m2
                     -    punpcklbw    m0, m7
                     -    punpckhbw    m1, m7
                     -    punpcklbw    m2, m7
                     -    punpckhbw    m3, m7
                     -    mova [r0+r3+ 0], m0
                     -    mova [r0+r3+ 8], m1
                     -    mova [r0+r3+16], m2
                     -    mova [r0+r3+24], m3
                     -    lea          r1, [r1+r2*2]
                     -    add          r3, 32
                     -    js .loop
                     -    REP_RET
+                    -
                     -INIT_XMM sse2
                     -cglobal get_pixels, 3, 4
                     -    movsxdifnidn r2, r2d
                     -    lea          r3, [r2*3]
                     -    pxor         m4, m4
                     -    movh         m0, [r1]
                     -    movh         m1, [r1+r2]
                     -    movh         m2, [r1+r2*2]
                     -    movh         m3, [r1+r3]
                     -    lea          r1, [r1+r2*4]
                     -    punpcklbw    m0, m4
                     -    punpcklbw    m1, m4
                     -    punpcklbw    m2, m4
                     -    punpcklbw    m3, m4
                     -    mova       [r0], m0
                     -    mova  [r0+0x10], m1
                     -    mova  [r0+0x20], m2
                     -    mova  [r0+0x30], m3
                     -    movh         m0, [r1]
                     -    movh         m1, [r1+r2*1]
                     -    movh         m2, [r1+r2*2]
                     -    movh         m3, [r1+r3]
                     -    punpcklbw    m0, m4
                     -    punpcklbw    m1, m4
                     -    punpcklbw    m2, m4
                     -    punpcklbw    m3, m4
                     -    mova  [r0+0x40], m0
                     -    mova  [r0+0x50], m1
                     -    mova  [r0+0x60], m2
                     -    mova  [r0+0x70], m3
                     -    RET
+                    -
                     -INIT_MMX mmx
                     -; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                     -;                         int stride);
                     -cglobal diff_pixels, 4,5
                     -    movsxdifnidn r3, r3d
                     -    pxor         m7, m7
                     -    add          r0,  128
                     -    mov          r4, -128
                     -.loop:
                     -    mova         m0, [r1]
                     -    mova         m2, [r2]
                     -    mova         m1, m0
                     -    mova         m3, m2
                     -    punpcklbw    m0, m7
                     -    punpckhbw    m1, m7
                     -    punpcklbw    m2, m7
                     -    punpckhbw    m3, m7
                     -    psubw        m0, m2
                     -    psubw        m1, m3
                     -    mova  [r0+r4+0], m0
                     -    mova  [r0+r4+8], m1
                     -    add          r1, r3
                     -    add          r2, r3
                     -    add          r4, 16
                     -    jne .loop
                     -    REP_RET

libavcodec/x86/dsputilenc_mmx.c

History View file @ f46bb60

@@ -30,11 +30,6 @@
                      #include "libavcodec/mpegvideo.h"
                      #include "dsputil_x86.h"
                     -void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
                     -void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
                     -void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                     -                        int stride);
+                    -
                      #if HAVE_INLINE_ASM
                      static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
@@ -823,16 +818,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
+                     {
                          int cpu_flags = av_get_cpu_flags();
                     -    if (EXTERNAL_MMX(cpu_flags)) {
                     -        if (!high_bit_depth)
                     -            c->get_pixels = ff_get_pixels_mmx;
                     -        c->diff_pixels = ff_diff_pixels_mmx;
                     -    }
+                    -
                     -    if (EXTERNAL_SSE2(cpu_flags))
                     -        if (!high_bit_depth)
                     -            c->get_pixels = ff_get_pixels_sse2;
+                    -
                      #if HAVE_INLINE_ASM
                          if (INLINE_MMX(cpu_flags)) {
                              c->sum_abs_dctelem = sum_abs_dctelem_mmx;

libavcodec/x86/pixblockdsp.asm

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,110 @@
                     +;*****************************************************************************
                     +;* SIMD-optimized pixel operations
                     +;*****************************************************************************
                     +;* Copyright (c) 2000, 2001 Fabrice Bellard
                     +;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
                     +;*
                     +;* This file is part of Libav.
                     +;*
                     +;* Libav is free software; you can redistribute it and/or
                     +;* modify it under the terms of the GNU Lesser General Public
                     +;* License as published by the Free Software Foundation; either
                     +;* version 2.1 of the License, or (at your option) any later version.
                     +;*
                     +;* Libav is distributed in the hope that it will be useful,
                     +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
                     +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     +;* Lesser General Public License for more details.
                     +;*
                     +;* You should have received a copy of the GNU Lesser General Public
                     +;* License along with Libav; if not, write to the Free Software
                     +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     +;*****************************************************************************
+                    +
                     +%include "libavutil/x86/x86util.asm"
+                    +
                     +SECTION .text
+                    +
                     +INIT_MMX mmx
                     +; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
                     +cglobal get_pixels, 3,4
                     +    movsxdifnidn r2, r2d
                     +    add          r0, 128
                     +    mov          r3, -128
                     +    pxor         m7, m7
                     +.loop:
                     +    mova         m0, [r1]
                     +    mova         m2, [r1+r2]
                     +    mova         m1, m0
                     +    mova         m3, m2
                     +    punpcklbw    m0, m7
                     +    punpckhbw    m1, m7
                     +    punpcklbw    m2, m7
                     +    punpckhbw    m3, m7
                     +    mova [r0+r3+ 0], m0
                     +    mova [r0+r3+ 8], m1
                     +    mova [r0+r3+16], m2
                     +    mova [r0+r3+24], m3
                     +    lea          r1, [r1+r2*2]
                     +    add          r3, 32
                     +    js .loop
                     +    REP_RET
+                    +
                     +INIT_XMM sse2
                     +cglobal get_pixels, 3, 4
                     +    movsxdifnidn r2, r2d
                     +    lea          r3, [r2*3]
                     +    pxor         m4, m4
                     +    movh         m0, [r1]
                     +    movh         m1, [r1+r2]
                     +    movh         m2, [r1+r2*2]
                     +    movh         m3, [r1+r3]
                     +    lea          r1, [r1+r2*4]
                     +    punpcklbw    m0, m4
                     +    punpcklbw    m1, m4
                     +    punpcklbw    m2, m4
                     +    punpcklbw    m3, m4
                     +    mova       [r0], m0
                     +    mova  [r0+0x10], m1
                     +    mova  [r0+0x20], m2
                     +    mova  [r0+0x30], m3
                     +    movh         m0, [r1]
                     +    movh         m1, [r1+r2*1]
                     +    movh         m2, [r1+r2*2]
                     +    movh         m3, [r1+r3]
                     +    punpcklbw    m0, m4
                     +    punpcklbw    m1, m4
                     +    punpcklbw    m2, m4
                     +    punpcklbw    m3, m4
                     +    mova  [r0+0x40], m0
                     +    mova  [r0+0x50], m1
                     +    mova  [r0+0x60], m2
                     +    mova  [r0+0x70], m3
                     +    RET
+                    +
                     +INIT_MMX mmx
                     +; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                     +;                         int stride);
                     +cglobal diff_pixels, 4,5
                     +    movsxdifnidn r3, r3d
                     +    pxor         m7, m7
                     +    add          r0,  128
                     +    mov          r4, -128
                     +.loop:
                     +    mova         m0, [r1]
                     +    mova         m2, [r2]
                     +    mova         m1, m0
                     +    mova         m3, m2
                     +    punpcklbw    m0, m7
                     +    punpckhbw    m1, m7
                     +    punpcklbw    m2, m7
                     +    punpckhbw    m3, m7
                     +    psubw        m0, m2
                     +    psubw        m1, m3
                     +    mova  [r0+r4+0], m0
                     +    mova  [r0+r4+8], m1
                     +    add          r1, r3
                     +    add          r2, r3
                     +    add          r4, 16
                     +    jne .loop
                     +    REP_RET

libavcodec/x86/pixblockdsp_init.c

History View file @ f46bb60

                     new file mode 100644
@@ -0,0 +1,47 @@
                     +/*
                     + * SIMD-optimized pixel operations
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "libavutil/attributes.h"
                     +#include "libavutil/cpu.h"
                     +#include "libavutil/x86/cpu.h"
                     +#include "libavcodec/pixblockdsp.h"
+                    +
                     +void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
                     +void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
                     +void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                     +                        int stride);
+                    +
                     +av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
                     +                                     AVCodecContext *avctx,
                     +                                     unsigned high_bit_depth)
                     +{
                     +    int cpu_flags = av_get_cpu_flags();
+                    +
                     +    if (EXTERNAL_MMX(cpu_flags)) {
                     +        if (!high_bit_depth)
                     +            c->get_pixels = ff_get_pixels_mmx;
                     +        c->diff_pixels = ff_diff_pixels_mmx;
                     +    }
+                    +
                     +    if (EXTERNAL_SSE2(cpu_flags)) {
                     +        if (!high_bit_depth)
                     +            c->get_pixels = ff_get_pixels_sse2;
                     +    }
                     +}