Browse code

dsputil: Split off pixel block routines into their own context

Diego Biurrun authored on 2014/02/04 07:29:09
Showing 27 changed files
... ...
@@ -1558,6 +1558,7 @@ CONFIG_EXTRA="
1558 1558
     mpegvideo
1559 1559
     mpegvideoenc
1560 1560
     nettle
1561
+    pixblockdsp
1561 1562
     qpeldsp
1562 1563
     rangecoder
1563 1564
     riffdec
... ...
@@ -1706,7 +1707,7 @@ threads_if_any="$THREADS_LIST"
1706 1706
 
1707 1707
 # subsystems
1708 1708
 dct_select="rdft"
1709
-dsputil_select="fdctdsp idctdsp"
1709
+dsputil_select="fdctdsp idctdsp pixblockdsp"
1710 1710
 error_resilience_select="dsputil"
1711 1711
 intrax8_select="error_resilience"
1712 1712
 mdct_select="fft"
... ...
@@ -1715,7 +1716,7 @@ mpeg_er_select="error_resilience"
1715 1715
 mpegaudio_select="mpegaudiodsp"
1716 1716
 mpegaudiodsp_select="dct"
1717 1717
 mpegvideo_select="blockdsp dsputil hpeldsp idctdsp videodsp"
1718
-mpegvideoenc_select="dsputil mpegvideo qpeldsp"
1718
+mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp"
1719 1719
 
1720 1720
 # decoders / encoders
1721 1721
 aac_decoder_select="mdct sinewin"
... ...
@@ -1732,9 +1733,9 @@ amrwb_decoder_select="lsp"
1732 1732
 amv_decoder_select="sp5x_decoder"
1733 1733
 ape_decoder_select="bswapdsp"
1734 1734
 asv1_decoder_select="blockdsp bswapdsp idctdsp"
1735
-asv1_encoder_select="bswapdsp dsputil fdctdsp"
1735
+asv1_encoder_select="bswapdsp fdctdsp pixblockdsp"
1736 1736
 asv2_decoder_select="blockdsp bswapdsp idctdsp"
1737
-asv2_encoder_select="bswapdsp dsputil fdctdsp"
1737
+asv2_encoder_select="bswapdsp fdctdsp pixblockdsp"
1738 1738
 atrac1_decoder_select="mdct sinewin"
1739 1739
 atrac3_decoder_select="mdct"
1740 1740
 atrac3p_decoder_select="mdct sinewin"
... ...
@@ -1749,9 +1750,9 @@ cscd_decoder_select="lzo"
1749 1749
 cscd_decoder_suggest="zlib"
1750 1750
 dca_decoder_select="mdct"
1751 1751
 dnxhd_decoder_select="blockdsp idctdsp"
1752
-dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc"
1752
+dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
1753 1753
 dvvideo_decoder_select="dvprofile idctdsp"
1754
-dvvideo_encoder_select="dsputil dvprofile fdctdsp"
1754
+dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp"
1755 1755
 dxa_decoder_deps="zlib"
1756 1756
 eac3_decoder_select="ac3_decoder"
1757 1757
 eac3_encoder_select="ac3_encoder"
... ...
@@ -72,6 +72,7 @@ OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideodsp.o    \
72 72
 OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
73 73
                                           motion_est.o ratecontrol.o    \
74 74
                                           mpegvideoencdsp.o
75
+OBJS-$(CONFIG_PIXBLOCKDSP)             += pixblockdsp.o
75 76
 OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
76 77
 OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
77 78
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
... ...
@@ -23,6 +23,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
23 23
 OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
24 24
 OBJS-$(CONFIG_MPEGVIDEOENC)            += arm/mpegvideoencdsp_init_arm.o
25 25
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)       += arm/neontest.o
26
+OBJS-$(CONFIG_PIXBLOCKDSP)             += arm/pixblockdsp_init_arm.o
26 27
 OBJS-$(CONFIG_VIDEODSP)                += arm/videodsp_init_arm.o
27 28
 OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
28 29
 
... ...
@@ -62,6 +63,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP)           += arm/idctdsp_init_armv6.o      \
62 62
                                           arm/simple_idct_armv6.o
63 63
 ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
64 64
 ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC)      += arm/mpegvideoencdsp_armv6.o
65
+ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP)       += arm/pixblockdsp_armv6.o
65 66
 
66 67
 ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
67 68
 ARMV6-OBJS-$(CONFIG_VP7_DECODER)       += arm/vp8_armv6.o               \
... ...
@@ -20,61 +20,6 @@
20 20
 
21 21
 #include "libavutil/arm/asm.S"
22 22
 
23
-function ff_get_pixels_armv6, export=1
24
-        pld             [r1, r2]
25
-        push            {r4-r8, lr}
26
-        mov             lr,  #8
27
-1:
28
-        ldrd_post       r4,  r5,  r1,  r2
29
-        subs            lr,  lr,  #1
30
-        uxtb16          r6,  r4
31
-        uxtb16          r4,  r4,  ror #8
32
-        uxtb16          r12, r5
33
-        uxtb16          r8,  r5,  ror #8
34
-        pld             [r1, r2]
35
-        pkhbt           r5,  r6,  r4,  lsl #16
36
-        pkhtb           r6,  r4,  r6,  asr #16
37
-        pkhbt           r7,  r12, r8,  lsl #16
38
-        pkhtb           r12, r8,  r12, asr #16
39
-        stm             r0!, {r5,r6,r7,r12}
40
-        bgt             1b
41
-
42
-        pop             {r4-r8, pc}
43
-endfunc
44
-
45
-function ff_diff_pixels_armv6, export=1
46
-        pld             [r1, r3]
47
-        pld             [r2, r3]
48
-        push            {r4-r9, lr}
49
-        mov             lr,  #8
50
-1:
51
-        ldrd_post       r4,  r5,  r1,  r3
52
-        ldrd_post       r6,  r7,  r2,  r3
53
-        uxtb16          r8,  r4
54
-        uxtb16          r4,  r4,  ror #8
55
-        uxtb16          r9,  r6
56
-        uxtb16          r6,  r6,  ror #8
57
-        pld             [r1, r3]
58
-        ssub16          r9,  r8,  r9
59
-        ssub16          r6,  r4,  r6
60
-        uxtb16          r8,  r5
61
-        uxtb16          r5,  r5,  ror #8
62
-        pld             [r2, r3]
63
-        pkhbt           r4,  r9,  r6,  lsl #16
64
-        pkhtb           r6,  r6,  r9,  asr #16
65
-        uxtb16          r9,  r7
66
-        uxtb16          r7,  r7,  ror #8
67
-        ssub16          r9,  r8,  r9
68
-        ssub16          r5,  r5,  r7
69
-        subs            lr,  lr,  #1
70
-        pkhbt           r8,  r9,  r5,  lsl #16
71
-        pkhtb           r9,  r5,  r9,  asr #16
72
-        stm             r0!, {r4,r6,r8,r9}
73
-        bgt             1b
74
-
75
-        pop             {r4-r9, pc}
76
-endfunc
77
-
78 23
 function ff_pix_abs16_armv6, export=1
79 24
         ldr             r0,  [sp]
80 25
         push            {r4-r9, lr}
... ...
@@ -26,10 +26,6 @@
26 26
 #include "libavcodec/mpegvideo.h"
27 27
 #include "dsputil_arm.h"
28 28
 
29
-void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
30
-void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
31
-                          const uint8_t *s2, int stride);
32
-
33 29
 int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
34 30
                        int line_size, int h);
35 31
 int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
... ...
@@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2,
46 46
 av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx,
47 47
                                    unsigned high_bit_depth)
48 48
 {
49
-    if (!high_bit_depth)
50
-        c->get_pixels = ff_get_pixels_armv6;
51
-    c->diff_pixels = ff_diff_pixels_armv6;
52
-
53 49
     c->pix_abs[0][0] = ff_pix_abs16_armv6;
54 50
     c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;
55 51
     c->pix_abs[0][2] = ff_pix_abs16_y2_armv6;
56 52
new file mode 100644
... ...
@@ -0,0 +1,76 @@
0
+/*
1
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/arm/asm.S"
21
+
22
+function ff_get_pixels_armv6, export=1
23
+        pld             [r1, r2]
24
+        push            {r4-r8, lr}
25
+        mov             lr,  #8
26
+1:
27
+        ldrd_post       r4,  r5,  r1,  r2
28
+        subs            lr,  lr,  #1
29
+        uxtb16          r6,  r4
30
+        uxtb16          r4,  r4,  ror #8
31
+        uxtb16          r12, r5
32
+        uxtb16          r8,  r5,  ror #8
33
+        pld             [r1, r2]
34
+        pkhbt           r5,  r6,  r4,  lsl #16
35
+        pkhtb           r6,  r4,  r6,  asr #16
36
+        pkhbt           r7,  r12, r8,  lsl #16
37
+        pkhtb           r12, r8,  r12, asr #16
38
+        stm             r0!, {r5,r6,r7,r12}
39
+        bgt             1b
40
+
41
+        pop             {r4-r8, pc}
42
+endfunc
43
+
44
+function ff_diff_pixels_armv6, export=1
45
+        pld             [r1, r3]
46
+        pld             [r2, r3]
47
+        push            {r4-r9, lr}
48
+        mov             lr,  #8
49
+1:
50
+        ldrd_post       r4,  r5,  r1,  r3
51
+        ldrd_post       r6,  r7,  r2,  r3
52
+        uxtb16          r8,  r4
53
+        uxtb16          r4,  r4,  ror #8
54
+        uxtb16          r9,  r6
55
+        uxtb16          r6,  r6,  ror #8
56
+        pld             [r1, r3]
57
+        ssub16          r9,  r8,  r9
58
+        ssub16          r6,  r4,  r6
59
+        uxtb16          r8,  r5
60
+        uxtb16          r5,  r5,  ror #8
61
+        pld             [r2, r3]
62
+        pkhbt           r4,  r9,  r6,  lsl #16
63
+        pkhtb           r6,  r6,  r9,  asr #16
64
+        uxtb16          r9,  r7
65
+        uxtb16          r7,  r7,  ror #8
66
+        ssub16          r9,  r8,  r9
67
+        ssub16          r5,  r5,  r7
68
+        subs            lr,  lr,  #1
69
+        pkhbt           r8,  r9,  r5,  lsl #16
70
+        pkhtb           r9,  r5,  r9,  asr #16
71
+        stm             r0!, {r4,r6,r8,r9}
72
+        bgt             1b
73
+
74
+        pop             {r4-r9, pc}
75
+endfunc
0 76
new file mode 100644
... ...
@@ -0,0 +1,42 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include <stdint.h>
19
+
20
+#include "libavutil/attributes.h"
21
+#include "libavutil/cpu.h"
22
+#include "libavutil/arm/cpu.h"
23
+#include "libavcodec/avcodec.h"
24
+#include "libavcodec/pixblockdsp.h"
25
+
26
+void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride);
27
+void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1,
28
+                          const uint8_t *s2, int stride);
29
+
30
+av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c,
31
+                                     AVCodecContext *avctx,
32
+                                     unsigned high_bit_depth)
33
+{
34
+    int cpu_flags = av_get_cpu_flags();
35
+
36
+    if (have_armv6(cpu_flags)) {
37
+        if (!high_bit_depth)
38
+            c->get_pixels = ff_get_pixels_armv6;
39
+        c->diff_pixels = ff_diff_pixels_armv6;
40
+    }
41
+}
... ...
@@ -33,19 +33,19 @@
33 33
 #include "avcodec.h"
34 34
 #include "blockdsp.h"
35 35
 #include "bswapdsp.h"
36
-#include "dsputil.h"
37 36
 #include "fdctdsp.h"
38 37
 #include "idctdsp.h"
39 38
 #include "get_bits.h"
39
+#include "pixblockdsp.h"
40 40
 #include "put_bits.h"
41 41
 
42 42
 typedef struct ASV1Context{
43 43
     AVCodecContext *avctx;
44 44
     BlockDSPContext bdsp;
45 45
     BswapDSPContext bbdsp;
46
-    DSPContext dsp;
47 46
     FDCTDSPContext fdsp;
48 47
     IDCTDSPContext idsp;
48
+    PixblockDSPContext pdsp;
49 49
     PutBitContext pb;
50 50
     GetBitContext gb;
51 51
     ScanTable scantable;
... ...
@@ -159,16 +159,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame,
159 159
     uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8;
160 160
     uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8;
161 161
 
162
-    a->dsp.get_pixels(block[0], ptr_y                 , linesize);
163
-    a->dsp.get_pixels(block[1], ptr_y              + 8, linesize);
164
-    a->dsp.get_pixels(block[2], ptr_y + 8*linesize    , linesize);
165
-    a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
162
+    a->pdsp.get_pixels(block[0], ptr_y,                    linesize);
163
+    a->pdsp.get_pixels(block[1], ptr_y + 8,                linesize);
164
+    a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize,     linesize);
165
+    a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize);
166 166
     for(i=0; i<4; i++)
167 167
         a->fdsp.fdct(block[i]);
168 168
 
169 169
     if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
170
-        a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
171
-        a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
170
+        a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
171
+        a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
172 172
         for(i=4; i<6; i++)
173 173
             a->fdsp.fdct(block[i]);
174 174
     }
... ...
@@ -248,8 +248,8 @@ static av_cold int encode_init(AVCodecContext *avctx){
248 248
     avctx->coded_frame->key_frame = 1;
249 249
 
250 250
     ff_asv_common_init(avctx);
251
-    ff_dsputil_init(&a->dsp, avctx);
252 251
     ff_fdctdsp_init(&a->fdsp, avctx);
252
+    ff_pixblockdsp_init(&a->pdsp, avctx);
253 253
 
254 254
     if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
255 255
 
... ...
@@ -30,10 +30,10 @@
30 30
 
31 31
 #include "avcodec.h"
32 32
 #include "blockdsp.h"
33
-#include "dsputil.h"
34 33
 #include "fdctdsp.h"
35 34
 #include "internal.h"
36 35
 #include "mpegvideo.h"
36
+#include "pixblockdsp.h"
37 37
 #include "dnxhdenc.h"
38 38
 
39 39
 // The largest value that will not lead to overflow for 10bit samples.
... ...
@@ -308,10 +308,10 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
308 308
     avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
309 309
 
310 310
     ff_blockdsp_init(&ctx->bdsp, avctx);
311
-    ff_dsputil_init(&ctx->m.dsp, avctx);
312 311
     ff_fdctdsp_init(&ctx->m.fdsp, avctx);
313 312
     ff_idctdsp_init(&ctx->m.idsp, avctx);
314 313
     ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
314
+    ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
315 315
     ff_dct_common_init(&ctx->m);
316 316
     if (!ctx->m.dct_quantize)
317 317
         ctx->m.dct_quantize = ff_dct_quantize_c;
... ...
@@ -540,12 +540,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
540 540
                            ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
541 541
     const uint8_t *ptr_v = ctx->thread[0]->src[2] +
542 542
                            ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
543
-    DSPContext *dsp = &ctx->m.dsp;
543
+    PixblockDSPContext *pdsp = &ctx->m.pdsp;
544 544
 
545
-    dsp->get_pixels(ctx->blocks[0], ptr_y,      ctx->m.linesize);
546
-    dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
547
-    dsp->get_pixels(ctx->blocks[2], ptr_u,      ctx->m.uvlinesize);
548
-    dsp->get_pixels(ctx->blocks[3], ptr_v,      ctx->m.uvlinesize);
545
+    pdsp->get_pixels(ctx->blocks[0], ptr_y,      ctx->m.linesize);
546
+    pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
547
+    pdsp->get_pixels(ctx->blocks[2], ptr_u,      ctx->m.uvlinesize);
548
+    pdsp->get_pixels(ctx->blocks[3], ptr_v,      ctx->m.uvlinesize);
549 549
 
550 550
     if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
551 551
         if (ctx->interlaced) {
... ...
@@ -568,14 +568,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
568 568
             ctx->bdsp.clear_block(ctx->blocks[7]);
569 569
         }
570 570
     } else {
571
-        dsp->get_pixels(ctx->blocks[4],
572
-                        ptr_y + ctx->dct_y_offset, ctx->m.linesize);
573
-        dsp->get_pixels(ctx->blocks[5],
574
-                        ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
575
-        dsp->get_pixels(ctx->blocks[6],
576
-                        ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
577
-        dsp->get_pixels(ctx->blocks[7],
578
-                        ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
571
+        pdsp->get_pixels(ctx->blocks[4],
572
+                         ptr_y + ctx->dct_y_offset, ctx->m.linesize);
573
+        pdsp->get_pixels(ctx->blocks[5],
574
+                         ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
575
+        pdsp->get_pixels(ctx->blocks[6],
576
+                         ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
577
+        pdsp->get_pixels(ctx->blocks[7],
578
+                         ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
579 579
     }
580 580
 }
581 581
 
... ...
@@ -35,13 +35,6 @@
35 35
 
36 36
 uint32_t ff_square_tab[512] = { 0, };
37 37
 
38
-#define BIT_DEPTH 16
39
-#include "dsputilenc_template.c"
40
-#undef BIT_DEPTH
41
-
42
-#define BIT_DEPTH 8
43
-#include "dsputilenc_template.c"
44
-
45 38
 static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
46 39
                   int line_size, int h)
47 40
 {
... ...
@@ -110,27 +103,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
110 110
     return s;
111 111
 }
112 112
 
113
-static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
114
-                          const uint8_t *s2, int stride)
115
-{
116
-    int i;
117
-
118
-    /* read the pixels */
119
-    for (i = 0; i < 8; i++) {
120
-        block[0] = s1[0] - s2[0];
121
-        block[1] = s1[1] - s2[1];
122
-        block[2] = s1[2] - s2[2];
123
-        block[3] = s1[3] - s2[3];
124
-        block[4] = s1[4] - s2[4];
125
-        block[5] = s1[5] - s2[5];
126
-        block[6] = s1[6] - s2[6];
127
-        block[7] = s1[7] - s2[7];
128
-        s1      += stride;
129
-        s2      += stride;
130
-        block   += 8;
131
-    }
132
-}
133
-
134 113
 static int sum_abs_dctelem_c(int16_t *block)
135 114
 {
136 115
     int sum = 0, i;
... ...
@@ -577,7 +549,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
577 577
 
578 578
     assert(h == 8);
579 579
 
580
-    s->dsp.diff_pixels(temp, src1, src2, stride);
580
+    s->pdsp.diff_pixels(temp, src1, src2, stride);
581 581
     s->fdsp.fdct(temp);
582 582
     return s->dsp.sum_abs_dctelem(temp);
583 583
 }
... ...
@@ -617,7 +589,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
617 617
     int16_t dct[8][8];
618 618
     int i, sum = 0;
619 619
 
620
-    s->dsp.diff_pixels(dct[0], src1, src2, stride);
620
+    s->pdsp.diff_pixels(dct[0], src1, src2, stride);
621 621
 
622 622
 #define SRC(x) dct[i][x]
623 623
 #define DST(x, v) dct[i][x] = v
... ...
@@ -644,7 +616,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
644 644
 
645 645
     assert(h == 8);
646 646
 
647
-    s->dsp.diff_pixels(temp, src1, src2, stride);
647
+    s->pdsp.diff_pixels(temp, src1, src2, stride);
648 648
     s->fdsp.fdct(temp);
649 649
 
650 650
     for (i = 0; i < 64; i++)
... ...
@@ -663,7 +635,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
663 663
     assert(h == 8);
664 664
     s->mb_intra = 0;
665 665
 
666
-    s->dsp.diff_pixels(temp, src1, src2, stride);
666
+    s->pdsp.diff_pixels(temp, src1, src2, stride);
667 667
 
668 668
     memcpy(bak, temp, 64 * sizeof(int16_t));
669 669
 
... ...
@@ -694,7 +666,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
694 694
     copy_block8(lsrc1, src1, 8, stride, 8);
695 695
     copy_block8(lsrc2, src2, 8, stride, 8);
696 696
 
697
-    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
697
+    s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
698 698
 
699 699
     s->block_last_index[0 /* FIXME */] =
700 700
     last                               =
... ...
@@ -766,7 +738,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
766 766
 
767 767
     assert(h == 8);
768 768
 
769
-    s->dsp.diff_pixels(temp, src1, src2, stride);
769
+    s->pdsp.diff_pixels(temp, src1, src2, stride);
770 770
 
771 771
     s->block_last_index[0 /* FIXME */] =
772 772
     last                               =
... ...
@@ -932,8 +904,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
932 932
 {
933 933
     const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
934 934
 
935
-    c->diff_pixels = diff_pixels_c;
936
-
937 935
     c->sum_abs_dctelem = sum_abs_dctelem_c;
938 936
 
939 937
     /* TODO [0] 16  [1] 8 */
... ...
@@ -975,16 +945,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
975 975
     c->nsse[0] = nsse16_c;
976 976
     c->nsse[1] = nsse8_c;
977 977
 
978
-    switch (avctx->bits_per_raw_sample) {
979
-    case 9:
980
-    case 10:
981
-        c->get_pixels = get_pixels_16_c;
982
-        break;
983
-    default:
984
-        c->get_pixels = get_pixels_8_c;
985
-        break;
986
-    }
987
-
988 978
     if (ARCH_ARM)
989 979
         ff_dsputil_init_arm(c, avctx, high_bit_depth);
990 980
     if (ARCH_PPC)
... ...
@@ -48,14 +48,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c,
48 48
  * DSPContext.
49 49
  */
50 50
 typedef struct DSPContext {
51
-    /* pixel ops : interface with DCT */
52
-    void (*get_pixels)(int16_t *block /* align 16 */,
53
-                       const uint8_t *pixels /* align 8 */,
54
-                       int line_size);
55
-    void (*diff_pixels)(int16_t *block /* align 16 */,
56
-                        const uint8_t *s1 /* align 8 */,
57
-                        const uint8_t *s2 /* align 8 */,
58
-                        int stride);
59 51
     int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
60 52
 
61 53
     me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
62 54
deleted file mode 100644
... ...
@@ -1,51 +0,0 @@
1
-/*
2
- * DSP utils
3
- * Copyright (c) 2000, 2001 Fabrice Bellard
4
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
- *
6
- * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
- *
8
- * This file is part of Libav.
9
- *
10
- * Libav is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU Lesser General Public
12
- * License as published by the Free Software Foundation; either
13
- * version 2.1 of the License, or (at your option) any later version.
14
- *
15
- * Libav is distributed in the hope that it will be useful,
16
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
- * Lesser General Public License for more details.
19
- *
20
- * You should have received a copy of the GNU Lesser General Public
21
- * License along with Libav; if not, write to the Free Software
22
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
- */
24
-
25
-/**
26
- * @file
27
- * DSP utils
28
- */
29
-
30
-#include "bit_depth_template.c"
31
-
32
-static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
33
-                              int line_size)
34
-{
35
-    const pixel *pixels = (const pixel *) _pixels;
36
-    int i;
37
-
38
-    /* read the pixels */
39
-    for (i = 0; i < 8; i++) {
40
-        block[0] = pixels[0];
41
-        block[1] = pixels[1];
42
-        block[2] = pixels[2];
43
-        block[3] = pixels[3];
44
-        block[4] = pixels[4];
45
-        block[5] = pixels[5];
46
-        block[6] = pixels[6];
47
-        block[7] = pixels[7];
48
-        pixels  += line_size / sizeof(pixel);
49
-        block   += 8;
50
-    }
51
-}
... ...
@@ -31,6 +31,7 @@
31 31
 #include "dsputil.h"
32 32
 #include "fdctdsp.h"
33 33
 #include "internal.h"
34
+#include "pixblockdsp.h"
34 35
 #include "put_bits.h"
35 36
 #include "dv.h"
36 37
 #include "dv_tablegen.h"
... ...
@@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
41 41
     DVVideoContext *s = avctx->priv_data;
42 42
     DSPContext dsp;
43 43
     FDCTDSPContext fdsp;
44
+    PixblockDSPContext pdsp;
44 45
     int ret;
45 46
 
46 47
     s->sys = avpriv_dv_codec_profile(avctx);
... ...
@@ -65,9 +67,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
65 65
 
66 66
     ff_dsputil_init(&dsp, avctx);
67 67
     ff_fdctdsp_init(&fdsp, avctx);
68
+    ff_pixblockdsp_init(&pdsp, avctx);
68 69
     ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp);
69 70
 
70
-    s->get_pixels = dsp.get_pixels;
71
+    s->get_pixels = pdsp.get_pixels;
71 72
     s->ildct_cmp  = dsp.ildct_cmp[5];
72 73
 
73 74
     s->fdct[0]    = fdsp.fdct;
... ...
@@ -39,6 +39,7 @@
39 39
 #include "idctdsp.h"
40 40
 #include "mpegvideodsp.h"
41 41
 #include "mpegvideoencdsp.h"
42
+#include "pixblockdsp.h"
42 43
 #include "put_bits.h"
43 44
 #include "ratecontrol.h"
44 45
 #include "parser.h"
... ...
@@ -361,6 +362,7 @@ typedef struct MpegEncContext {
361 361
     IDCTDSPContext idsp;
362 362
     MpegVideoDSPContext mdsp;
363 363
     MpegvideoEncDSPContext mpvencdsp;
364
+    PixblockDSPContext pdsp;
364 365
     QpelDSPContext qdsp;
365 366
     VideoDSPContext vdsp;
366 367
     H263DSPContext h263dsp;
... ...
@@ -37,7 +37,6 @@
37 37
 #include "libavutil/timer.h"
38 38
 #include "avcodec.h"
39 39
 #include "dct.h"
40
-#include "dsputil.h"
41 40
 #include "idctdsp.h"
42 41
 #include "mpeg12.h"
43 42
 #include "mpegvideo.h"
... ...
@@ -48,6 +47,7 @@
48 48
 #include "mpegutils.h"
49 49
 #include "mjpegenc.h"
50 50
 #include "msmpeg4.h"
51
+#include "pixblockdsp.h"
51 52
 #include "qpeldsp.h"
52 53
 #include "faandct.h"
53 54
 #include "thread.h"
... ...
@@ -703,6 +703,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
703 703
 
704 704
     ff_fdctdsp_init(&s->fdsp, avctx);
705 705
     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
706
+    ff_pixblockdsp_init(&s->pdsp, avctx);
706 707
     ff_qpeldsp_init(&s->qdsp);
707 708
 
708 709
     s->avctx->coded_frame = s->current_picture.f;
... ...
@@ -1943,22 +1944,22 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
1943 1943
             }
1944 1944
         }
1945 1945
 
1946
-        s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1947
-        s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1948
-        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1949
-        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1946
+        s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1947
+        s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1948
+        s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1949
+        s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1950 1950
 
1951 1951
         if (s->flags & CODEC_FLAG_GRAY) {
1952 1952
             skip_dct[4] = 1;
1953 1953
             skip_dct[5] = 1;
1954 1954
         } else {
1955
-            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1956
-            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1955
+            s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1956
+            s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1957 1957
             if (!s->chroma_y_shift) { /* 422 */
1958
-                s->dsp.get_pixels(s->block[6],
1959
-                                  ptr_cb + (dct_offset >> 1), wrap_c);
1960
-                s->dsp.get_pixels(s->block[7],
1961
-                                  ptr_cr + (dct_offset >> 1), wrap_c);
1958
+                s->pdsp.get_pixels(s->block[6],
1959
+                                   ptr_cb + (dct_offset >> 1), wrap_c);
1960
+                s->pdsp.get_pixels(s->block[7],
1961
+                                   ptr_cr + (dct_offset >> 1), wrap_c);
1962 1962
             }
1963 1963
         }
1964 1964
     } else {
... ...
@@ -2024,24 +2025,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
2024 2024
             }
2025 2025
         }
2026 2026
 
2027
-        s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2028
-        s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2029
-        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2030
-                           dest_y + dct_offset, wrap_y);
2031
-        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2032
-                           dest_y + dct_offset + 8, wrap_y);
2027
+        s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2028
+        s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2029
+        s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2030
+                            dest_y + dct_offset, wrap_y);
2031
+        s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2032
+                            dest_y + dct_offset + 8, wrap_y);
2033 2033
 
2034 2034
         if (s->flags & CODEC_FLAG_GRAY) {
2035 2035
             skip_dct[4] = 1;
2036 2036
             skip_dct[5] = 1;
2037 2037
         } else {
2038
-            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2039
-            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2038
+            s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2039
+            s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2040 2040
             if (!s->chroma_y_shift) { /* 422 */
2041
-                s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2042
-                                   dest_cb + (dct_offset >> 1), wrap_c);
2043
-                s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2044
-                                   dest_cr + (dct_offset >> 1), wrap_c);
2041
+                s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2042
+                                    dest_cb + (dct_offset >> 1), wrap_c);
2043
+                s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2044
+                                    dest_cr + (dct_offset >> 1), wrap_c);
2045 2045
             }
2046 2046
         }
2047 2047
         /* pre quantization */
2048 2048
new file mode 100644
... ...
@@ -0,0 +1,76 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include <stdint.h>
19
+
20
+#include "config.h"
21
+#include "libavutil/attributes.h"
22
+#include "avcodec.h"
23
+#include "pixblockdsp.h"
24
+
25
+#define BIT_DEPTH 16
26
+#include "pixblockdsp_template.c"
27
+#undef BIT_DEPTH
28
+
29
+#define BIT_DEPTH 8
30
+#include "pixblockdsp_template.c"
31
+
32
+static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
33
+                          const uint8_t *s2, int stride)
34
+{
35
+    int i;
36
+
37
+    /* read the pixels */
38
+    for (i = 0; i < 8; i++) {
39
+        block[0] = s1[0] - s2[0];
40
+        block[1] = s1[1] - s2[1];
41
+        block[2] = s1[2] - s2[2];
42
+        block[3] = s1[3] - s2[3];
43
+        block[4] = s1[4] - s2[4];
44
+        block[5] = s1[5] - s2[5];
45
+        block[6] = s1[6] - s2[6];
46
+        block[7] = s1[7] - s2[7];
47
+        s1      += stride;
48
+        s2      += stride;
49
+        block   += 8;
50
+    }
51
+}
52
+
53
+av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
54
+{
55
+    const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
56
+
57
+    c->diff_pixels = diff_pixels_c;
58
+
59
+    switch (avctx->bits_per_raw_sample) {
60
+    case 9:
61
+    case 10:
62
+        c->get_pixels = get_pixels_16_c;
63
+        break;
64
+    default:
65
+        c->get_pixels = get_pixels_8_c;
66
+        break;
67
+    }
68
+
69
+    if (ARCH_ARM)
70
+        ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
71
+    if (ARCH_PPC)
72
+        ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
73
+    if (ARCH_X86)
74
+        ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
75
+}
0 76
new file mode 100644
... ...
@@ -0,0 +1,44 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#ifndef AVCODEC_PIXBLOCKDSP_H
19
+#define AVCODEC_PIXBLOCKDSP_H
20
+
21
+#include <stdint.h>
22
+
23
+#include "avcodec.h"
24
+
25
+typedef struct PixblockDSPContext {
26
+    void (*get_pixels)(int16_t *block /* align 16 */,
27
+                       const uint8_t *pixels /* align 8 */,
28
+                       int line_size);
29
+    void (*diff_pixels)(int16_t *block /* align 16 */,
30
+                        const uint8_t *s1 /* align 8 */,
31
+                        const uint8_t *s2 /* align 8 */,
32
+                        int stride);
33
+} PixblockDSPContext;
34
+
35
+void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
36
+void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
37
+                             unsigned high_bit_depth);
38
+void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
39
+                             unsigned high_bit_depth);
40
+void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
41
+                             unsigned high_bit_depth);
42
+
43
+#endif /* AVCODEC_PIXBLOCKDSP_H */
0 44
new file mode 100644
... ...
@@ -0,0 +1,40 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include "bit_depth_template.c"
19
+
20
+static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
21
+                              int line_size)
22
+{
23
+    const pixel *pixels = (const pixel *) _pixels;
24
+    int i;
25
+
26
+    /* read the pixels */
27
+    for (i = 0; i < 8; i++) {
28
+        block[0] = pixels[0];
29
+        block[1] = pixels[1];
30
+        block[2] = pixels[2];
31
+        block[3] = pixels[3];
32
+        block[4] = pixels[4];
33
+        block[5] = pixels[5];
34
+        block[6] = pixels[6];
35
+        block[7] = pixels[7];
36
+        pixels  += line_size / sizeof(pixel);
37
+        block   += 8;
38
+    }
39
+}
... ...
@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += ppc/mpegaudiodsp_altivec.o
15 15
 OBJS-$(CONFIG_MPEGVIDEO)               += ppc/mpegvideo_altivec.o      \
16 16
                                           ppc/mpegvideodsp.o
17 17
 OBJS-$(CONFIG_MPEGVIDEOENC)            += ppc/mpegvideoencdsp.o
18
+OBJS-$(CONFIG_PIXBLOCKDSP)             += ppc/pixblockdsp.o
18 19
 OBJS-$(CONFIG_VIDEODSP)                += ppc/videodsp_ppc.o
19 20
 OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o
20 21
 
... ...
@@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
402 402
     return s;
403 403
 }
404 404
 
405
-static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
406
-                               int line_size)
407
-{
408
-    int i;
409
-    vector unsigned char perm = vec_lvsl(0, pixels);
410
-    const vector unsigned char zero =
411
-        (const vector unsigned char) vec_splat_u8(0);
412
-
413
-    for (i = 0; i < 8; i++) {
414
-        /* Read potentially unaligned pixels.
415
-         * We're reading 16 pixels, and actually only want 8,
416
-         * but we simply ignore the extras. */
417
-        vector unsigned char pixl = vec_ld(0, pixels);
418
-        vector unsigned char pixr = vec_ld(7, pixels);
419
-        vector unsigned char bytes = vec_perm(pixl, pixr, perm);
420
-
421
-        // Convert the bytes into shorts.
422
-        vector signed short shorts = (vector signed short) vec_mergeh(zero,
423
-                                                                      bytes);
424
-
425
-        // Save the data to the block, we assume the block is 16-byte aligned.
426
-        vec_st(shorts, i * 16, (vector signed short *) block);
427
-
428
-        pixels += line_size;
429
-    }
430
-}
431
-
432
-static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
433
-                                const uint8_t *s2, int stride)
434
-{
435
-    int i;
436
-    vector unsigned char perm1 = vec_lvsl(0, s1);
437
-    vector unsigned char perm2 = vec_lvsl(0, s2);
438
-    const vector unsigned char zero =
439
-        (const vector unsigned char) vec_splat_u8(0);
440
-    vector signed short shorts1, shorts2;
441
-
442
-    for (i = 0; i < 4; i++) {
443
-        /* Read potentially unaligned pixels.
444
-         * We're reading 16 pixels, and actually only want 8,
445
-         * but we simply ignore the extras. */
446
-        vector unsigned char pixl  = vec_ld(0,  s1);
447
-        vector unsigned char pixr  = vec_ld(15, s1);
448
-        vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
449
-
450
-        // Convert the bytes into shorts.
451
-        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
452
-
453
-        // Do the same for the second block of pixels.
454
-        pixl  = vec_ld(0,  s2);
455
-        pixr  = vec_ld(15, s2);
456
-        bytes = vec_perm(pixl, pixr, perm2);
457
-
458
-        // Convert the bytes into shorts.
459
-        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
460
-
461
-        // Do the subtraction.
462
-        shorts1 = vec_sub(shorts1, shorts2);
463
-
464
-        // Save the data to the block, we assume the block is 16-byte aligned.
465
-        vec_st(shorts1, 0, (vector signed short *) block);
466
-
467
-        s1    += stride;
468
-        s2    += stride;
469
-        block += 8;
470
-
471
-        /* The code below is a copy of the code above...
472
-         * This is a manual unroll. */
473
-
474
-        /* Read potentially unaligned pixels.
475
-         * We're reading 16 pixels, and actually only want 8,
476
-         * but we simply ignore the extras. */
477
-        pixl  = vec_ld(0,  s1);
478
-        pixr  = vec_ld(15, s1);
479
-        bytes = vec_perm(pixl, pixr, perm1);
480
-
481
-        // Convert the bytes into shorts.
482
-        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
483
-
484
-        // Do the same for the second block of pixels.
485
-        pixl  = vec_ld(0,  s2);
486
-        pixr  = vec_ld(15, s2);
487
-        bytes = vec_perm(pixl, pixr, perm2);
488
-
489
-        // Convert the bytes into shorts.
490
-        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
491
-
492
-        // Do the subtraction.
493
-        shorts1 = vec_sub(shorts1, shorts2);
494
-
495
-        // Save the data to the block, we assume the block is 16-byte aligned.
496
-        vec_st(shorts1, 0, (vector signed short *) block);
497
-
498
-        s1    += stride;
499
-        s2    += stride;
500
-        block += 8;
501
-    }
502
-}
503
-
504 405
 static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
505 406
                                      uint8_t *src, int stride, int h)
506 407
 {
... ...
@@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
854 854
     c->sse[0] = sse16_altivec;
855 855
     c->sse[1] = sse8_altivec;
856 856
 
857
-    c->diff_pixels = diff_pixels_altivec;
858
-
859
-    if (!high_bit_depth) {
860
-        c->get_pixels = get_pixels_altivec;
861
-    }
862
-
863 857
     c->hadamard8_diff[0] = hadamard8_diff16_altivec;
864 858
     c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
865 859
 }
866 860
new file mode 100644
... ...
@@ -0,0 +1,153 @@
0
+/*
1
+ * Copyright (c) 2002 Brian Foley
2
+ * Copyright (c) 2002 Dieter Shirley
3
+ * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "config.h"
23
+#if HAVE_ALTIVEC_H
24
+#include <altivec.h>
25
+#endif
26
+
27
+#include "libavutil/attributes.h"
28
+#include "libavutil/cpu.h"
29
+#include "libavutil/ppc/cpu.h"
30
+#include "libavutil/ppc/types_altivec.h"
31
+#include "libavutil/ppc/util_altivec.h"
32
+#include "libavcodec/avcodec.h"
33
+#include "libavcodec/pixblockdsp.h"
34
+
35
+#if HAVE_ALTIVEC
36
+
37
+static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
38
+                               int line_size)
39
+{
40
+    int i;
41
+    vector unsigned char perm = vec_lvsl(0, pixels);
42
+    const vector unsigned char zero =
43
+        (const vector unsigned char) vec_splat_u8(0);
44
+
45
+    for (i = 0; i < 8; i++) {
46
+        /* Read potentially unaligned pixels.
47
+         * We're reading 16 pixels, and actually only want 8,
48
+         * but we simply ignore the extras. */
49
+        vector unsigned char pixl = vec_ld(0, pixels);
50
+        vector unsigned char pixr = vec_ld(7, pixels);
51
+        vector unsigned char bytes = vec_perm(pixl, pixr, perm);
52
+
53
+        // Convert the bytes into shorts.
54
+        vector signed short shorts = (vector signed short) vec_mergeh(zero,
55
+                                                                      bytes);
56
+
57
+        // Save the data to the block, we assume the block is 16-byte aligned.
58
+        vec_st(shorts, i * 16, (vector signed short *) block);
59
+
60
+        pixels += line_size;
61
+    }
62
+}
63
+
64
+static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
65
+                                const uint8_t *s2, int stride)
66
+{
67
+    int i;
68
+    vector unsigned char perm1 = vec_lvsl(0, s1);
69
+    vector unsigned char perm2 = vec_lvsl(0, s2);
70
+    const vector unsigned char zero =
71
+        (const vector unsigned char) vec_splat_u8(0);
72
+    vector signed short shorts1, shorts2;
73
+
74
+    for (i = 0; i < 4; i++) {
75
+        /* Read potentially unaligned pixels.
76
+         * We're reading 16 pixels, and actually only want 8,
77
+         * but we simply ignore the extras. */
78
+        vector unsigned char pixl  = vec_ld(0,  s1);
79
+        vector unsigned char pixr  = vec_ld(15, s1);
80
+        vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
81
+
82
+        // Convert the bytes into shorts.
83
+        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
84
+
85
+        // Do the same for the second block of pixels.
86
+        pixl  = vec_ld(0,  s2);
87
+        pixr  = vec_ld(15, s2);
88
+        bytes = vec_perm(pixl, pixr, perm2);
89
+
90
+        // Convert the bytes into shorts.
91
+        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
92
+
93
+        // Do the subtraction.
94
+        shorts1 = vec_sub(shorts1, shorts2);
95
+
96
+        // Save the data to the block, we assume the block is 16-byte aligned.
97
+        vec_st(shorts1, 0, (vector signed short *) block);
98
+
99
+        s1    += stride;
100
+        s2    += stride;
101
+        block += 8;
102
+
103
+        /* The code below is a copy of the code above...
104
+         * This is a manual unroll. */
105
+
106
+        /* Read potentially unaligned pixels.
107
+         * We're reading 16 pixels, and actually only want 8,
108
+         * but we simply ignore the extras. */
109
+        pixl  = vec_ld(0,  s1);
110
+        pixr  = vec_ld(15, s1);
111
+        bytes = vec_perm(pixl, pixr, perm1);
112
+
113
+        // Convert the bytes into shorts.
114
+        shorts1 = (vector signed short) vec_mergeh(zero, bytes);
115
+
116
+        // Do the same for the second block of pixels.
117
+        pixl  = vec_ld(0,  s2);
118
+        pixr  = vec_ld(15, s2);
119
+        bytes = vec_perm(pixl, pixr, perm2);
120
+
121
+        // Convert the bytes into shorts.
122
+        shorts2 = (vector signed short) vec_mergeh(zero, bytes);
123
+
124
+        // Do the subtraction.
125
+        shorts1 = vec_sub(shorts1, shorts2);
126
+
127
+        // Save the data to the block, we assume the block is 16-byte aligned.
128
+        vec_st(shorts1, 0, (vector signed short *) block);
129
+
130
+        s1    += stride;
131
+        s2    += stride;
132
+        block += 8;
133
+    }
134
+}
135
+
136
+#endif /* HAVE_ALTIVEC */
137
+
138
+av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
139
+                                     AVCodecContext *avctx,
140
+                                     unsigned high_bit_depth)
141
+{
142
+#if HAVE_ALTIVEC
143
+    if (!PPC_ALTIVEC(av_get_cpu_flags()))
144
+        return;
145
+
146
+    c->diff_pixels = diff_pixels_altivec;
147
+
148
+    if (!high_bit_depth) {
149
+        c->get_pixels = get_pixels_altivec;
150
+    }
151
+#endif /* HAVE_ALTIVEC */
152
+}
... ...
@@ -26,6 +26,7 @@ OBJS-$(CONFIG_MPEGVIDEO)               += x86/mpegvideo.o              \
26 26
                                           x86/mpegvideodsp.o
27 27
 OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o           \
28 28
                                           x86/mpegvideoencdsp_init.o
29
+OBJS-$(CONFIG_PIXBLOCKDSP)             += x86/pixblockdsp_init.o
29 30
 OBJS-$(CONFIG_QPELDSP)                 += x86/qpeldsp_init.o
30 31
 OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
31 32
 OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
... ...
@@ -93,6 +94,7 @@ YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
93 93
 YASM-OBJS-$(CONFIG_HUFFYUVDSP)         += x86/huffyuvdsp.o
94 94
 YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
95 95
 YASM-OBJS-$(CONFIG_MPEGVIDEOENC)       += x86/mpegvideoencdsp.o
96
+YASM-OBJS-$(CONFIG_PIXBLOCKDSP)        += x86/pixblockdsp.o
96 97
 YASM-OBJS-$(CONFIG_QPELDSP)            += x86/qpeldsp.o                 \
97 98
                                           x86/fpel.o                    \
98 99
                                           x86/qpel.o
... ...
@@ -334,87 +334,3 @@ cglobal sse16, 5, 5, 8
334 334
     paddd     m7, m1
335 335
     movd     eax, m7         ; return value
336 336
     RET
337
-
338
-INIT_MMX mmx
339
-; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
340
-cglobal get_pixels, 3,4
341
-    movsxdifnidn r2, r2d
342
-    add          r0, 128
343
-    mov          r3, -128
344
-    pxor         m7, m7
345
-.loop:
346
-    mova         m0, [r1]
347
-    mova         m2, [r1+r2]
348
-    mova         m1, m0
349
-    mova         m3, m2
350
-    punpcklbw    m0, m7
351
-    punpckhbw    m1, m7
352
-    punpcklbw    m2, m7
353
-    punpckhbw    m3, m7
354
-    mova [r0+r3+ 0], m0
355
-    mova [r0+r3+ 8], m1
356
-    mova [r0+r3+16], m2
357
-    mova [r0+r3+24], m3
358
-    lea          r1, [r1+r2*2]
359
-    add          r3, 32
360
-    js .loop
361
-    REP_RET
362
-
363
-INIT_XMM sse2
364
-cglobal get_pixels, 3, 4
365
-    movsxdifnidn r2, r2d
366
-    lea          r3, [r2*3]
367
-    pxor         m4, m4
368
-    movh         m0, [r1]
369
-    movh         m1, [r1+r2]
370
-    movh         m2, [r1+r2*2]
371
-    movh         m3, [r1+r3]
372
-    lea          r1, [r1+r2*4]
373
-    punpcklbw    m0, m4
374
-    punpcklbw    m1, m4
375
-    punpcklbw    m2, m4
376
-    punpcklbw    m3, m4
377
-    mova       [r0], m0
378
-    mova  [r0+0x10], m1
379
-    mova  [r0+0x20], m2
380
-    mova  [r0+0x30], m3
381
-    movh         m0, [r1]
382
-    movh         m1, [r1+r2*1]
383
-    movh         m2, [r1+r2*2]
384
-    movh         m3, [r1+r3]
385
-    punpcklbw    m0, m4
386
-    punpcklbw    m1, m4
387
-    punpcklbw    m2, m4
388
-    punpcklbw    m3, m4
389
-    mova  [r0+0x40], m0
390
-    mova  [r0+0x50], m1
391
-    mova  [r0+0x60], m2
392
-    mova  [r0+0x70], m3
393
-    RET
394
-
395
-INIT_MMX mmx
396
-; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
397
-;                         int stride);
398
-cglobal diff_pixels, 4,5
399
-    movsxdifnidn r3, r3d
400
-    pxor         m7, m7
401
-    add          r0,  128
402
-    mov          r4, -128
403
-.loop:
404
-    mova         m0, [r1]
405
-    mova         m2, [r2]
406
-    mova         m1, m0
407
-    mova         m3, m2
408
-    punpcklbw    m0, m7
409
-    punpckhbw    m1, m7
410
-    punpcklbw    m2, m7
411
-    punpckhbw    m3, m7
412
-    psubw        m0, m2
413
-    psubw        m1, m3
414
-    mova  [r0+r4+0], m0
415
-    mova  [r0+r4+8], m1
416
-    add          r1, r3
417
-    add          r2, r3
418
-    add          r4, 16
419
-    jne .loop
420
-    REP_RET
... ...
@@ -30,11 +30,6 @@
30 30
 #include "libavcodec/mpegvideo.h"
31 31
 #include "dsputil_x86.h"
32 32
 
33
-void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
34
-void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
35
-void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
36
-                        int stride);
37
-
38 33
 #if HAVE_INLINE_ASM
39 34
 
40 35
 static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
... ...
@@ -823,16 +818,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
823 823
 {
824 824
     int cpu_flags = av_get_cpu_flags();
825 825
 
826
-    if (EXTERNAL_MMX(cpu_flags)) {
827
-        if (!high_bit_depth)
828
-            c->get_pixels = ff_get_pixels_mmx;
829
-        c->diff_pixels = ff_diff_pixels_mmx;
830
-    }
831
-
832
-    if (EXTERNAL_SSE2(cpu_flags))
833
-        if (!high_bit_depth)
834
-            c->get_pixels = ff_get_pixels_sse2;
835
-
836 826
 #if HAVE_INLINE_ASM
837 827
     if (INLINE_MMX(cpu_flags)) {
838 828
         c->sum_abs_dctelem = sum_abs_dctelem_mmx;
839 829
new file mode 100644
... ...
@@ -0,0 +1,110 @@
0
+;*****************************************************************************
1
+;* SIMD-optimized pixel operations
2
+;*****************************************************************************
3
+;* Copyright (c) 2000, 2001 Fabrice Bellard
4
+;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
+;*
6
+;* This file is part of Libav.
7
+;*
8
+;* Libav is free software; you can redistribute it and/or
9
+;* modify it under the terms of the GNU Lesser General Public
10
+;* License as published by the Free Software Foundation; either
11
+;* version 2.1 of the License, or (at your option) any later version.
12
+;*
13
+;* Libav is distributed in the hope that it will be useful,
14
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
+;* Lesser General Public License for more details.
17
+;*
18
+;* You should have received a copy of the GNU Lesser General Public
19
+;* License along with Libav; if not, write to the Free Software
20
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+;*****************************************************************************
22
+
23
+%include "libavutil/x86/x86util.asm"
24
+
25
+SECTION .text
26
+
27
+INIT_MMX mmx
28
+; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
29
+cglobal get_pixels, 3,4
30
+    movsxdifnidn r2, r2d
31
+    add          r0, 128
32
+    mov          r3, -128
33
+    pxor         m7, m7
34
+.loop:
35
+    mova         m0, [r1]
36
+    mova         m2, [r1+r2]
37
+    mova         m1, m0
38
+    mova         m3, m2
39
+    punpcklbw    m0, m7
40
+    punpckhbw    m1, m7
41
+    punpcklbw    m2, m7
42
+    punpckhbw    m3, m7
43
+    mova [r0+r3+ 0], m0
44
+    mova [r0+r3+ 8], m1
45
+    mova [r0+r3+16], m2
46
+    mova [r0+r3+24], m3
47
+    lea          r1, [r1+r2*2]
48
+    add          r3, 32
49
+    js .loop
50
+    REP_RET
51
+
52
+INIT_XMM sse2
53
+cglobal get_pixels, 3, 4
54
+    movsxdifnidn r2, r2d
55
+    lea          r3, [r2*3]
56
+    pxor         m4, m4
57
+    movh         m0, [r1]
58
+    movh         m1, [r1+r2]
59
+    movh         m2, [r1+r2*2]
60
+    movh         m3, [r1+r3]
61
+    lea          r1, [r1+r2*4]
62
+    punpcklbw    m0, m4
63
+    punpcklbw    m1, m4
64
+    punpcklbw    m2, m4
65
+    punpcklbw    m3, m4
66
+    mova       [r0], m0
67
+    mova  [r0+0x10], m1
68
+    mova  [r0+0x20], m2
69
+    mova  [r0+0x30], m3
70
+    movh         m0, [r1]
71
+    movh         m1, [r1+r2*1]
72
+    movh         m2, [r1+r2*2]
73
+    movh         m3, [r1+r3]
74
+    punpcklbw    m0, m4
75
+    punpcklbw    m1, m4
76
+    punpcklbw    m2, m4
77
+    punpcklbw    m3, m4
78
+    mova  [r0+0x40], m0
79
+    mova  [r0+0x50], m1
80
+    mova  [r0+0x60], m2
81
+    mova  [r0+0x70], m3
82
+    RET
83
+
84
+INIT_MMX mmx
85
+; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
86
+;                         int stride);
87
+cglobal diff_pixels, 4,5
88
+    movsxdifnidn r3, r3d
89
+    pxor         m7, m7
90
+    add          r0,  128
91
+    mov          r4, -128
92
+.loop:
93
+    mova         m0, [r1]
94
+    mova         m2, [r2]
95
+    mova         m1, m0
96
+    mova         m3, m2
97
+    punpcklbw    m0, m7
98
+    punpckhbw    m1, m7
99
+    punpcklbw    m2, m7
100
+    punpckhbw    m3, m7
101
+    psubw        m0, m2
102
+    psubw        m1, m3
103
+    mova  [r0+r4+0], m0
104
+    mova  [r0+r4+8], m1
105
+    add          r1, r3
106
+    add          r2, r3
107
+    add          r4, 16
108
+    jne .loop
109
+    REP_RET
0 110
new file mode 100644
... ...
@@ -0,0 +1,47 @@
0
+/*
1
+ * SIMD-optimized pixel operations
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/attributes.h"
21
+#include "libavutil/cpu.h"
22
+#include "libavutil/x86/cpu.h"
23
+#include "libavcodec/pixblockdsp.h"
24
+
25
+void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
26
+void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
27
+void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
28
+                        int stride);
29
+
30
+av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
31
+                                     AVCodecContext *avctx,
32
+                                     unsigned high_bit_depth)
33
+{
34
+    int cpu_flags = av_get_cpu_flags();
35
+
36
+    if (EXTERNAL_MMX(cpu_flags)) {
37
+        if (!high_bit_depth)
38
+            c->get_pixels = ff_get_pixels_mmx;
39
+        c->diff_pixels = ff_diff_pixels_mmx;
40
+    }
41
+
42
+    if (EXTERNAL_SSE2(cpu_flags)) {
43
+        if (!high_bit_depth)
44
+            c->get_pixels = ff_get_pixels_sse2;
45
+    }
46
+}