Browse code

dsputil: Separate h264chroma

Diego Biurrun authored on 2013/01/19 11:34:47
Showing 40 changed files
... ...
@@ -1638,7 +1638,7 @@ wmv3_vdpau_decoder_select="vc1_vdpau_decoder"
1638 1638
 wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
1639 1639
 
1640 1640
 # parsers
1641
-h264_parser_select="error_resilience golomb h264dsp h264pred h264qpel mpegvideo"
1641
+h264_parser_select="error_resilience golomb h264chroma h264dsp h264pred h264qpel mpegvideo"
1642 1642
 mpeg4video_parser_select="error_resilience mpegvideo"
1643 1643
 mpegvideo_parser_select="error_resilience mpegvideo"
1644 1644
 vc1_parser_select="error_resilience mpegvideo"
... ...
@@ -44,6 +44,7 @@ FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
44 44
 OBJS-$(CONFIG_FFT)                     += avfft.o fft_fixed.o fft_float.o \
45 45
                                           $(FFT-OBJS-yes)
46 46
 OBJS-$(CONFIG_GOLOMB)                  += golomb.o
47
+OBJS-$(CONFIG_H264CHROMA)              += h264chroma.o
47 48
 OBJS-$(CONFIG_H264DSP)                 += h264dsp.o h264idct.o
48 49
 OBJS-$(CONFIG_H264PRED)                += h264pred.o
49 50
 OBJS-$(CONFIG_H264QPEL)                += h264qpel.o
... ...
@@ -26,6 +26,7 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
26 26
                                           arm/vp8dsp_init_armv6.o       \
27 27
                                           arm/vp8dsp_armv6.o
28 28
 
29
+OBJS-$(CONFIG_H264CHROMA)              += arm/h264chroma_init_arm.o
29 30
 OBJS-$(CONFIG_H264DSP)                 += arm/h264dsp_init_arm.o
30 31
 OBJS-$(CONFIG_H264PRED)                += arm/h264pred_init_arm.o
31 32
 OBJS-$(CONFIG_H264QPEL)                += arm/h264qpel_init_arm.o
... ...
@@ -67,9 +68,9 @@ NEON-OBJS-$(CONFIG_MDCT)               += arm/mdct_neon.o               \
67 67
 
68 68
 NEON-OBJS-$(CONFIG_RDFT)               += arm/rdft_neon.o               \
69 69
 
70
+NEON-OBJS-$(CONFIG_H264CHROMA)         += arm/h264cmc_neon.o
70 71
 NEON-OBJS-$(CONFIG_H264DSP)            += arm/h264dsp_neon.o            \
71 72
                                           arm/h264idct_neon.o           \
72
-                                          arm/h264cmc_neon.o            \
73 73
 
74 74
 NEON-OBJS-$(CONFIG_H264PRED)           += arm/h264pred_neon.o           \
75 75
 
... ...
@@ -87,7 +88,6 @@ NEON-OBJS-$(CONFIG_MPEGVIDEO)          += arm/mpegvideo_neon.o
87 87
 NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
88 88
 NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
89 89
                                           arm/rv40dsp_neon.o            \
90
-                                          arm/h264cmc_neon.o            \
91 90
 
92 91
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
93 92
 
... ...
@@ -64,14 +64,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
64 64
 void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
65 65
 void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
66 66
 
67
-void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
68
-void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
69
-void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
70
-
71
-void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
72
-void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
73
-void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
74
-
75 67
 void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
76 68
                           int len);
77 69
 void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
... ...
@@ -139,16 +131,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
139 139
     c->put_pixels_clamped = ff_put_pixels_clamped_neon;
140 140
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
141 141
 
142
-    if (CONFIG_H264_DECODER && !high_bit_depth) {
143
-        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
144
-        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
145
-        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
146
-
147
-        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
148
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
149
-        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
150
-    }
151
-
152 142
     c->vector_clipf               = ff_vector_clipf_neon;
153 143
     c->vector_clip_int32          = ff_vector_clip_int32_neon;
154 144
 
155 145
new file mode 100644
... ...
@@ -0,0 +1,51 @@
0
+/*
1
+ * ARM NEON optimised H.264 chroma functions
2
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include <stdint.h>
22
+
23
+#include "libavutil/attributes.h"
24
+#include "libavutil/cpu.h"
25
+#include "libavutil/arm/cpu.h"
26
+#include "libavcodec/h264chroma.h"
27
+
28
+void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
29
+void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
30
+void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
31
+
32
+void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
33
+void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
34
+void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
35
+
36
+av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth)
37
+{
38
+    const int high_bit_depth = bit_depth > 8;
39
+    int cpu_flags = av_get_cpu_flags();
40
+
41
+    if (have_neon(cpu_flags) && !high_bit_depth) {
42
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
43
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
44
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
45
+
46
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
47
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
48
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
49
+    }
50
+}
... ...
@@ -28,6 +28,7 @@
28 28
 #include "avcodec.h"
29 29
 #include "get_bits.h"
30 30
 #include "golomb.h"
31
+#include "h264chroma.h"
31 32
 #include "mathops.h"
32 33
 #include "cavs.h"
33 34
 
... ...
@@ -464,30 +465,35 @@ void ff_cavs_inter(AVSContext *h, enum cavs_mb mb_type) {
464 464
     if(ff_cavs_partition_flags[mb_type] == 0){ // 16x16
465 465
         mc_part_std(h, 8, 0, h->cy, h->cu, h->cv, 0, 0,
466 466
                 h->cdsp.put_cavs_qpel_pixels_tab[0],
467
-                h->dsp.put_h264_chroma_pixels_tab[0],
467
+                h->h264chroma.put_h264_chroma_pixels_tab[0],
468 468
                 h->cdsp.avg_cavs_qpel_pixels_tab[0],
469
-                h->dsp.avg_h264_chroma_pixels_tab[0],&h->mv[MV_FWD_X0]);
469
+                h->h264chroma.avg_h264_chroma_pixels_tab[0],
470
+                &h->mv[MV_FWD_X0]);
470 471
     }else{
471 472
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 0,
472 473
                 h->cdsp.put_cavs_qpel_pixels_tab[1],
473
-                h->dsp.put_h264_chroma_pixels_tab[1],
474
+                h->h264chroma.put_h264_chroma_pixels_tab[1],
474 475
                 h->cdsp.avg_cavs_qpel_pixels_tab[1],
475
-                h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X0]);
476
+                h->h264chroma.avg_h264_chroma_pixels_tab[1],
477
+                &h->mv[MV_FWD_X0]);
476 478
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 0,
477 479
                 h->cdsp.put_cavs_qpel_pixels_tab[1],
478
-                h->dsp.put_h264_chroma_pixels_tab[1],
480
+                h->h264chroma.put_h264_chroma_pixels_tab[1],
479 481
                 h->cdsp.avg_cavs_qpel_pixels_tab[1],
480
-                h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X1]);
482
+                h->h264chroma.avg_h264_chroma_pixels_tab[1],
483
+                &h->mv[MV_FWD_X1]);
481 484
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 4,
482 485
                 h->cdsp.put_cavs_qpel_pixels_tab[1],
483
-                h->dsp.put_h264_chroma_pixels_tab[1],
486
+                h->h264chroma.put_h264_chroma_pixels_tab[1],
484 487
                 h->cdsp.avg_cavs_qpel_pixels_tab[1],
485
-                h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X2]);
488
+                h->h264chroma.avg_h264_chroma_pixels_tab[1],
489
+                &h->mv[MV_FWD_X2]);
486 490
         mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 4,
487 491
                 h->cdsp.put_cavs_qpel_pixels_tab[1],
488
-                h->dsp.put_h264_chroma_pixels_tab[1],
492
+                h->h264chroma.put_h264_chroma_pixels_tab[1],
489 493
                 h->cdsp.avg_cavs_qpel_pixels_tab[1],
490
-                h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X3]);
494
+                h->h264chroma.avg_h264_chroma_pixels_tab[1],
495
+                &h->mv[MV_FWD_X3]);
491 496
     }
492 497
 }
493 498
 
... ...
@@ -720,6 +726,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx) {
720 720
     AVSContext *h = avctx->priv_data;
721 721
 
722 722
     ff_dsputil_init(&h->dsp, avctx);
723
+    ff_h264chroma_init(&h->h264chroma, 8);
723 724
     ff_videodsp_init(&h->vdsp, 8);
724 725
     ff_cavsdsp_init(&h->cdsp, avctx);
725 726
     ff_init_scantable_permutation(h->dsp.idct_permutation,
... ...
@@ -24,6 +24,7 @@
24 24
 
25 25
 #include "cavsdsp.h"
26 26
 #include "dsputil.h"
27
+#include "h264chroma.h"
27 28
 #include "get_bits.h"
28 29
 #include "videodsp.h"
29 30
 
... ...
@@ -161,6 +162,7 @@ typedef struct AVSFrame {
161 161
 typedef struct AVSContext {
162 162
     AVCodecContext *avctx;
163 163
     DSPContext       dsp;
164
+    H264ChromaContext h264chroma;
164 165
     VideoDSPContext vdsp;
165 166
     CAVSDSPContext  cdsp;
166 167
     GetBitContext gb;
... ...
@@ -2719,13 +2719,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2719 2719
     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
2720 2720
     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
2721 2721
     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
2722
-\
2723
-    c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
2724
-    c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
2725
-    c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
2726
-    c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
2727
-    c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
2728
-    c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth)
2729 2722
 
2730 2723
     switch (avctx->bits_per_raw_sample) {
2731 2724
     case 9:
... ...
@@ -140,7 +140,6 @@ void clear_blocks_c(int16_t *blocks);
140 140
 typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h);
141 141
 typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
142 142
 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
143
-typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
144 143
 
145 144
 typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
146 145
 
... ...
@@ -308,12 +307,6 @@ typedef struct DSPContext {
308 308
     qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
309 309
     qpel_mc_func put_mspel_pixels_tab[8];
310 310
 
311
-    /**
312
-     * h264 Chroma MC
313
-     */
314
-    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
315
-    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
316
-
317 311
     me_cmp_func pix_abs[2][4];
318 312
 
319 313
     /* huffyuv specific */
... ...
@@ -463,124 +463,6 @@ PIXOP2(put, op_put)
463 463
 #undef op_avg
464 464
 #undef op_put
465 465
 
466
-#define H264_CHROMA_MC(OPNAME, OP)\
467
-static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
468
-    pixel *dst = (pixel*)_dst;\
469
-    pixel *src = (pixel*)_src;\
470
-    const int A=(8-x)*(8-y);\
471
-    const int B=(  x)*(8-y);\
472
-    const int C=(8-x)*(  y);\
473
-    const int D=(  x)*(  y);\
474
-    int i;\
475
-    stride /= sizeof(pixel);\
476
-    \
477
-    assert(x<8 && y<8 && x>=0 && y>=0);\
478
-\
479
-    if(D){\
480
-        for(i=0; i<h; i++){\
481
-            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
482
-            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
483
-            dst+= stride;\
484
-            src+= stride;\
485
-        }\
486
-    }else{\
487
-        const int E= B+C;\
488
-        const int step= C ? stride : 1;\
489
-        for(i=0; i<h; i++){\
490
-            OP(dst[0], (A*src[0] + E*src[step+0]));\
491
-            OP(dst[1], (A*src[1] + E*src[step+1]));\
492
-            dst+= stride;\
493
-            src+= stride;\
494
-        }\
495
-    }\
496
-}\
497
-\
498
-static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
499
-    pixel *dst = (pixel*)_dst;\
500
-    pixel *src = (pixel*)_src;\
501
-    const int A=(8-x)*(8-y);\
502
-    const int B=(  x)*(8-y);\
503
-    const int C=(8-x)*(  y);\
504
-    const int D=(  x)*(  y);\
505
-    int i;\
506
-    stride /= sizeof(pixel);\
507
-    \
508
-    assert(x<8 && y<8 && x>=0 && y>=0);\
509
-\
510
-    if(D){\
511
-        for(i=0; i<h; i++){\
512
-            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
513
-            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
514
-            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
515
-            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
516
-            dst+= stride;\
517
-            src+= stride;\
518
-        }\
519
-    }else{\
520
-        const int E= B+C;\
521
-        const int step= C ? stride : 1;\
522
-        for(i=0; i<h; i++){\
523
-            OP(dst[0], (A*src[0] + E*src[step+0]));\
524
-            OP(dst[1], (A*src[1] + E*src[step+1]));\
525
-            OP(dst[2], (A*src[2] + E*src[step+2]));\
526
-            OP(dst[3], (A*src[3] + E*src[step+3]));\
527
-            dst+= stride;\
528
-            src+= stride;\
529
-        }\
530
-    }\
531
-}\
532
-\
533
-static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
534
-    pixel *dst = (pixel*)_dst;\
535
-    pixel *src = (pixel*)_src;\
536
-    const int A=(8-x)*(8-y);\
537
-    const int B=(  x)*(8-y);\
538
-    const int C=(8-x)*(  y);\
539
-    const int D=(  x)*(  y);\
540
-    int i;\
541
-    stride /= sizeof(pixel);\
542
-    \
543
-    assert(x<8 && y<8 && x>=0 && y>=0);\
544
-\
545
-    if(D){\
546
-        for(i=0; i<h; i++){\
547
-            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
548
-            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
549
-            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
550
-            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
551
-            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
552
-            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
553
-            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
554
-            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
555
-            dst+= stride;\
556
-            src+= stride;\
557
-        }\
558
-    }else{\
559
-        const int E= B+C;\
560
-        const int step= C ? stride : 1;\
561
-        for(i=0; i<h; i++){\
562
-            OP(dst[0], (A*src[0] + E*src[step+0]));\
563
-            OP(dst[1], (A*src[1] + E*src[step+1]));\
564
-            OP(dst[2], (A*src[2] + E*src[step+2]));\
565
-            OP(dst[3], (A*src[3] + E*src[step+3]));\
566
-            OP(dst[4], (A*src[4] + E*src[step+4]));\
567
-            OP(dst[5], (A*src[5] + E*src[step+5]));\
568
-            OP(dst[6], (A*src[6] + E*src[step+6]));\
569
-            OP(dst[7], (A*src[7] + E*src[step+7]));\
570
-            dst+= stride;\
571
-            src+= stride;\
572
-        }\
573
-    }\
574
-}
575
-
576
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
577
-#define op_put(a, b) a = (((b) + 32)>>6)
578
-
579
-H264_CHROMA_MC(put_       , op_put)
580
-H264_CHROMA_MC(avg_       , op_avg)
581
-#undef op_avg
582
-#undef op_put
583
-
584 466
 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
585 467
     FUNCC(put_pixels8)(dst, src, stride, 8);
586 468
 }
... ...
@@ -34,6 +34,7 @@
34 34
 #include "mpegvideo.h"
35 35
 #include "h264.h"
36 36
 #include "h264data.h"
37
+#include "h264chroma.h"
37 38
 #include "h264_mvpred.h"
38 39
 #include "golomb.h"
39 40
 #include "mathops.h"
... ...
@@ -976,6 +977,7 @@ static av_cold void common_init(H264Context *h)
976 976
     s->codec_id = s->avctx->codec->id;
977 977
 
978 978
     ff_h264dsp_init(&h->h264dsp, 8, 1);
979
+    ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
979 980
     ff_h264qpel_init(&h->h264qpel, 8);
980 981
     ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
981 982
 
... ...
@@ -2445,6 +2447,7 @@ static int h264_set_parameter_from_sps(H264Context *h)
2445 2445
 
2446 2446
             ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
2447 2447
                             h->sps.chroma_format_idc);
2448
+            ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
2448 2449
             ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
2449 2450
             ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma,
2450 2451
                               h->sps.chroma_format_idc);
... ...
@@ -31,6 +31,7 @@
31 31
 #include "libavutil/intreadwrite.h"
32 32
 #include "cabac.h"
33 33
 #include "mpegvideo.h"
34
+#include "h264chroma.h"
34 35
 #include "h264dsp.h"
35 36
 #include "h264pred.h"
36 37
 #include "h264qpel.h"
... ...
@@ -254,6 +255,7 @@ typedef struct MMCO {
254 254
 typedef struct H264Context {
255 255
     MpegEncContext s;
256 256
     H264DSPContext h264dsp;
257
+    H264ChromaContext h264chroma;
257 258
     H264QpelContext h264qpel;
258 259
     int pixel_shift;    ///< 0 for 8-bit H264, 1 for high-bit-depth H264
259 260
     int chroma_qp[2];   // QPc
... ...
@@ -176,14 +176,14 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
176 176
         } else if (is_h264) {
177 177
             if (chroma422) {
178 178
                 FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
179
-                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
180
-                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
179
+                              s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
180
+                              s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
181 181
                               h->h264dsp.weight_h264_pixels_tab,
182 182
                               h->h264dsp.biweight_h264_pixels_tab);
183 183
             } else {
184 184
                 FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
185
-                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
186
-                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
185
+                              s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
186
+                              s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
187 187
                               h->h264dsp.weight_h264_pixels_tab,
188 188
                               h->h264dsp.biweight_h264_pixels_tab);
189 189
             }
... ...
@@ -360,8 +360,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
360 360
                                linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
361 361
         } else {
362 362
             FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
363
-                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
364
-                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
363
+                      s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
364
+                      s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
365 365
                       h->h264dsp.weight_h264_pixels_tab,
366 366
                       h->h264dsp.biweight_h264_pixels_tab);
367 367
         }
368 368
new file mode 100644
... ...
@@ -0,0 +1,64 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include "config.h"
19
+#include "h264chroma.h"
20
+
21
+#define BIT_DEPTH 8
22
+#include "h264chroma_template.c"
23
+#undef BIT_DEPTH
24
+
25
+#define BIT_DEPTH 9
26
+#include "h264chroma_template.c"
27
+#undef BIT_DEPTH
28
+
29
+#define BIT_DEPTH 10
30
+#include "h264chroma_template.c"
31
+#undef BIT_DEPTH
32
+
33
+#define SET_CHROMA(depth)                                                   \
34
+    c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_ ## depth ## _c; \
35
+    c->put_h264_chroma_pixels_tab[1] = put_h264_chroma_mc4_ ## depth ## _c; \
36
+    c->put_h264_chroma_pixels_tab[2] = put_h264_chroma_mc2_ ## depth ## _c; \
37
+    c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_ ## depth ## _c; \
38
+    c->avg_h264_chroma_pixels_tab[1] = avg_h264_chroma_mc4_ ## depth ## _c; \
39
+    c->avg_h264_chroma_pixels_tab[2] = avg_h264_chroma_mc2_ ## depth ## _c; \
40
+
41
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth)
42
+{
43
+    switch (bit_depth) {
44
+    case 10:
45
+        SET_CHROMA(10);
46
+        break;
47
+    case 9:
48
+        SET_CHROMA(9);
49
+        break;
50
+    default:
51
+        SET_CHROMA(8);
52
+        break;
53
+    }
54
+
55
+    if (ARCH_ARM)
56
+        ff_h264chroma_init_arm(c, bit_depth);
57
+    if (ARCH_PPC)
58
+        ff_h264chroma_init_ppc(c, bit_depth);
59
+    if (ARCH_SH4)
60
+        ff_h264chroma_init_sh4(c, bit_depth);
61
+    if (ARCH_X86)
62
+        ff_h264chroma_init_x86(c, bit_depth);
63
+}
0 64
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#ifndef AVCODEC_H264CHROMA_H
19
+#define AVCODEC_H264CHROMA_H
20
+
21
+#include <stdint.h>
22
+
23
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
24
+
25
+typedef struct H264ChromaContext {
26
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
27
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
28
+} H264ChromaContext;
29
+
30
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth);
31
+
32
+void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
33
+void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
34
+void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth);
35
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
36
+
37
+#endif /* AVCODEC_H264CHROMA_H */
0 38
new file mode 100644
... ...
@@ -0,0 +1,142 @@
0
+/*
1
+ * Copyright (c) 2000, 2001 Fabrice Bellard
2
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include <assert.h>
22
+
23
+#include "bit_depth_template.c"
24
+
25
+#define H264_CHROMA_MC(OPNAME, OP)\
26
+static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
27
+    pixel *dst = (pixel*)_dst;\
28
+    pixel *src = (pixel*)_src;\
29
+    const int A=(8-x)*(8-y);\
30
+    const int B=(  x)*(8-y);\
31
+    const int C=(8-x)*(  y);\
32
+    const int D=(  x)*(  y);\
33
+    int i;\
34
+    stride /= sizeof(pixel);\
35
+    \
36
+    assert(x<8 && y<8 && x>=0 && y>=0);\
37
+\
38
+    if(D){\
39
+        for(i=0; i<h; i++){\
40
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
41
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
42
+            dst+= stride;\
43
+            src+= stride;\
44
+        }\
45
+    }else{\
46
+        const int E= B+C;\
47
+        const int step= C ? stride : 1;\
48
+        for(i=0; i<h; i++){\
49
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
50
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
51
+            dst+= stride;\
52
+            src+= stride;\
53
+        }\
54
+    }\
55
+}\
56
+\
57
+static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
58
+    pixel *dst = (pixel*)_dst;\
59
+    pixel *src = (pixel*)_src;\
60
+    const int A=(8-x)*(8-y);\
61
+    const int B=(  x)*(8-y);\
62
+    const int C=(8-x)*(  y);\
63
+    const int D=(  x)*(  y);\
64
+    int i;\
65
+    stride /= sizeof(pixel);\
66
+    \
67
+    assert(x<8 && y<8 && x>=0 && y>=0);\
68
+\
69
+    if(D){\
70
+        for(i=0; i<h; i++){\
71
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
72
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
73
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
74
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
75
+            dst+= stride;\
76
+            src+= stride;\
77
+        }\
78
+    }else{\
79
+        const int E= B+C;\
80
+        const int step= C ? stride : 1;\
81
+        for(i=0; i<h; i++){\
82
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
83
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
84
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
85
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
86
+            dst+= stride;\
87
+            src+= stride;\
88
+        }\
89
+    }\
90
+}\
91
+\
92
+static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
93
+    pixel *dst = (pixel*)_dst;\
94
+    pixel *src = (pixel*)_src;\
95
+    const int A=(8-x)*(8-y);\
96
+    const int B=(  x)*(8-y);\
97
+    const int C=(8-x)*(  y);\
98
+    const int D=(  x)*(  y);\
99
+    int i;\
100
+    stride /= sizeof(pixel);\
101
+    \
102
+    assert(x<8 && y<8 && x>=0 && y>=0);\
103
+\
104
+    if(D){\
105
+        for(i=0; i<h; i++){\
106
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
107
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
108
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
109
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
110
+            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
111
+            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
112
+            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
113
+            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
114
+            dst+= stride;\
115
+            src+= stride;\
116
+        }\
117
+    }else{\
118
+        const int E= B+C;\
119
+        const int step= C ? stride : 1;\
120
+        for(i=0; i<h; i++){\
121
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
122
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
123
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
124
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
125
+            OP(dst[4], (A*src[4] + E*src[step+4]));\
126
+            OP(dst[5], (A*src[5] + E*src[step+5]));\
127
+            OP(dst[6], (A*src[6] + E*src[step+6]));\
128
+            OP(dst[7], (A*src[7] + E*src[step+7]));\
129
+            dst+= stride;\
130
+            src+= stride;\
131
+        }\
132
+    }\
133
+}
134
+
135
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
136
+#define op_put(a, b) a = (((b) + 32)>>6)
137
+
138
+H264_CHROMA_MC(put_       , op_put)
139
+H264_CHROMA_MC(avg_       , op_avg)
140
+#undef op_avg
141
+#undef op_put
... ...
@@ -1,6 +1,7 @@
1 1
 OBJS                                   += ppc/dsputil_ppc.o             \
2 2
                                           ppc/videodsp_ppc.o            \
3 3
 
4
+OBJS-$(CONFIG_H264CHROMA)              += ppc/h264chroma_init.o
4 5
 OBJS-$(CONFIG_H264QPEL)                += ppc/h264_qpel.o
5 6
 OBJS-$(CONFIG_VORBIS_DECODER)          += ppc/vorbisdsp_altivec.o
6 7
 OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o
... ...
@@ -36,8 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
36 36
 void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
37 37
 void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
38 38
 
39
-void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
40
-
41 39
 void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
42 40
 void ff_float_init_altivec(DSPContext* c, AVCodecContext *avctx);
43 41
 void ff_int_init_altivec(DSPContext* c, AVCodecContext *avctx);
... ...
@@ -157,8 +157,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
157 157
     }
158 158
 
159 159
 #if HAVE_ALTIVEC
160
-    if(CONFIG_H264_DECODER) ff_dsputil_h264_init_ppc(c, avctx);
161
-
162 160
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
163 161
         ff_dsputil_init_altivec(c, avctx);
164 162
         ff_int_init_altivec(c, avctx);
... ...
@@ -33,8 +33,6 @@
33 33
 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
34 34
 
35 35
 #define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
36
-#define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
37
-#define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
38 36
 #define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
39 37
 #define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
40 38
 #define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
... ...
@@ -43,8 +41,6 @@
43 43
 #define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
44 44
 #include "h264_qpel_template.c"
45 45
 #undef OP_U8_ALTIVEC
46
-#undef PREFIX_h264_chroma_mc8_altivec
47
-#undef PREFIX_h264_chroma_mc8_num
48 46
 #undef PREFIX_h264_qpel16_h_lowpass_altivec
49 47
 #undef PREFIX_h264_qpel16_h_lowpass_num
50 48
 #undef PREFIX_h264_qpel16_v_lowpass_altivec
... ...
@@ -53,8 +49,6 @@
53 53
 #undef PREFIX_h264_qpel16_hv_lowpass_num
54 54
 
55 55
 #define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
56
-#define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
57
-#define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
58 56
 #define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
59 57
 #define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
60 58
 #define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
... ...
@@ -63,8 +57,6 @@
63 63
 #define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
64 64
 #include "h264_qpel_template.c"
65 65
 #undef OP_U8_ALTIVEC
66
-#undef PREFIX_h264_chroma_mc8_altivec
67
-#undef PREFIX_h264_chroma_mc8_num
68 66
 #undef PREFIX_h264_qpel16_h_lowpass_altivec
69 67
 #undef PREFIX_h264_qpel16_h_lowpass_num
70 68
 #undef PREFIX_h264_qpel16_v_lowpass_altivec
... ...
@@ -273,18 +265,6 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
273 273
 
274 274
 H264_MC(put_, 16, altivec)
275 275
 H264_MC(avg_, 16, altivec)
276
-
277
-void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx)
278
-{
279
-    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
280
-
281
-    if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
282
-    if (!high_bit_depth) {
283
-        c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
284
-        c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
285
-    }
286
-    }
287
-}
288 276
 #endif /* HAVE_ALTIVEC */
289 277
 
290 278
 av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
... ...
@@ -26,274 +26,6 @@
26 26
 #define ASSERT_ALIGNED(ptr) ;
27 27
 #endif
28 28
 
29
-/* this code assume that stride % 16 == 0 */
30
-
31
-#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \
32
-        vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
33
-        vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
34
-\
35
-        psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
36
-        psum = vec_mladd(vB, vsrc1ssH, psum);\
37
-        psum = vec_mladd(vC, vsrc2ssH, psum);\
38
-        psum = vec_mladd(vD, vsrc3ssH, psum);\
39
-        psum = BIAS2(psum);\
40
-        psum = vec_sr(psum, v6us);\
41
-\
42
-        vdst = vec_ld(0, dst);\
43
-        ppsum = (vec_u8)vec_pack(psum, psum);\
44
-        vfdst = vec_perm(vdst, ppsum, fperm);\
45
-\
46
-        OP_U8_ALTIVEC(fsum, vfdst, vdst);\
47
-\
48
-        vec_st(fsum, 0, dst);\
49
-\
50
-        vsrc0ssH = vsrc2ssH;\
51
-        vsrc1ssH = vsrc3ssH;\
52
-\
53
-        dst += stride;\
54
-        src += stride;
55
-
56
-#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
57
-\
58
-        vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
59
-        vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
60
-\
61
-        psum = vec_mladd(vA, vsrc0ssH, v32ss);\
62
-        psum = vec_mladd(vE, vsrc1ssH, psum);\
63
-        psum = vec_sr(psum, v6us);\
64
-\
65
-        vdst = vec_ld(0, dst);\
66
-        ppsum = (vec_u8)vec_pack(psum, psum);\
67
-        vfdst = vec_perm(vdst, ppsum, fperm);\
68
-\
69
-        OP_U8_ALTIVEC(fsum, vfdst, vdst);\
70
-\
71
-        vec_st(fsum, 0, dst);\
72
-\
73
-        dst += stride;\
74
-        src += stride;
75
-
76
-#define noop(a) a
77
-#define add28(a) vec_add(v28ss, a)
78
-
79
-#ifdef PREFIX_h264_chroma_mc8_altivec
80
-static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
81
-                                    int stride, int h, int x, int y) {
82
-    DECLARE_ALIGNED(16, signed int, ABCD)[4] =
83
-                        {((8 - x) * (8 - y)),
84
-                         ((    x) * (8 - y)),
85
-                         ((8 - x) * (    y)),
86
-                         ((    x) * (    y))};
87
-    register int i;
88
-    vec_u8 fperm;
89
-    const vec_s32 vABCD = vec_ld(0, ABCD);
90
-    const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
91
-    const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
92
-    const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
93
-    const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
94
-    LOAD_ZERO;
95
-    const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
96
-    const vec_u16 v6us = vec_splat_u16(6);
97
-    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
98
-    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
99
-
100
-    vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
101
-    vec_u8 vsrc0uc, vsrc1uc;
102
-    vec_s16 vsrc0ssH, vsrc1ssH;
103
-    vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
104
-    vec_s16 vsrc2ssH, vsrc3ssH, psum;
105
-    vec_u8 vdst, ppsum, vfdst, fsum;
106
-
107
-    if (((unsigned long)dst) % 16 == 0) {
108
-        fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
109
-                         0x14, 0x15, 0x16, 0x17,
110
-                         0x08, 0x09, 0x0A, 0x0B,
111
-                         0x0C, 0x0D, 0x0E, 0x0F};
112
-    } else {
113
-        fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
114
-                         0x04, 0x05, 0x06, 0x07,
115
-                         0x18, 0x19, 0x1A, 0x1B,
116
-                         0x1C, 0x1D, 0x1E, 0x1F};
117
-    }
118
-
119
-    vsrcAuc = vec_ld(0, src);
120
-
121
-    if (loadSecond)
122
-        vsrcBuc = vec_ld(16, src);
123
-    vsrcperm0 = vec_lvsl(0, src);
124
-    vsrcperm1 = vec_lvsl(1, src);
125
-
126
-    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
127
-    if (reallyBadAlign)
128
-        vsrc1uc = vsrcBuc;
129
-    else
130
-        vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
131
-
132
-    vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
133
-    vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
134
-
135
-    if (ABCD[3]) {
136
-        if (!loadSecond) {// -> !reallyBadAlign
137
-            for (i = 0 ; i < h ; i++) {
138
-                vsrcCuc = vec_ld(stride + 0, src);
139
-                vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
140
-                vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
141
-
142
-                CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
143
-            }
144
-        } else {
145
-            vec_u8 vsrcDuc;
146
-            for (i = 0 ; i < h ; i++) {
147
-                vsrcCuc = vec_ld(stride + 0, src);
148
-                vsrcDuc = vec_ld(stride + 16, src);
149
-                vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
150
-                if (reallyBadAlign)
151
-                    vsrc3uc = vsrcDuc;
152
-                else
153
-                    vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
154
-
155
-                CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
156
-            }
157
-        }
158
-    } else {
159
-        const vec_s16 vE = vec_add(vB, vC);
160
-        if (ABCD[2]) { // x == 0 B == 0
161
-            if (!loadSecond) {// -> !reallyBadAlign
162
-                for (i = 0 ; i < h ; i++) {
163
-                    vsrcCuc = vec_ld(stride + 0, src);
164
-                    vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
165
-                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
166
-
167
-                    vsrc0uc = vsrc1uc;
168
-                }
169
-            } else {
170
-                vec_u8 vsrcDuc;
171
-                for (i = 0 ; i < h ; i++) {
172
-                    vsrcCuc = vec_ld(stride + 0, src);
173
-                    vsrcDuc = vec_ld(stride + 15, src);
174
-                    vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
175
-                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
176
-
177
-                    vsrc0uc = vsrc1uc;
178
-                }
179
-            }
180
-        } else { // y == 0 C == 0
181
-            if (!loadSecond) {// -> !reallyBadAlign
182
-                for (i = 0 ; i < h ; i++) {
183
-                    vsrcCuc = vec_ld(0, src);
184
-                    vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
185
-                    vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
186
-
187
-                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
188
-                }
189
-            } else {
190
-                vec_u8 vsrcDuc;
191
-                for (i = 0 ; i < h ; i++) {
192
-                    vsrcCuc = vec_ld(0, src);
193
-                    vsrcDuc = vec_ld(15, src);
194
-                    vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
195
-                    if (reallyBadAlign)
196
-                        vsrc1uc = vsrcDuc;
197
-                    else
198
-                        vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
199
-
200
-                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
201
-                }
202
-            }
203
-        }
204
-    }
205
-}
206
-#endif
207
-
208
-/* this code assume that stride % 16 == 0 */
209
-#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec
210
-static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
211
-   DECLARE_ALIGNED(16, signed int, ABCD)[4] =
212
-                        {((8 - x) * (8 - y)),
213
-                         ((    x) * (8 - y)),
214
-                         ((8 - x) * (    y)),
215
-                         ((    x) * (    y))};
216
-    register int i;
217
-    vec_u8 fperm;
218
-    const vec_s32 vABCD = vec_ld(0, ABCD);
219
-    const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
220
-    const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
221
-    const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
222
-    const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
223
-    LOAD_ZERO;
224
-    const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
225
-    const vec_u16 v6us  = vec_splat_u16(6);
226
-    register int loadSecond     = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
227
-    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
228
-
229
-    vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
230
-    vec_u8 vsrc0uc, vsrc1uc;
231
-    vec_s16 vsrc0ssH, vsrc1ssH;
232
-    vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
233
-    vec_s16 vsrc2ssH, vsrc3ssH, psum;
234
-    vec_u8 vdst, ppsum, vfdst, fsum;
235
-
236
-    if (((unsigned long)dst) % 16 == 0) {
237
-        fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
238
-                         0x14, 0x15, 0x16, 0x17,
239
-                         0x08, 0x09, 0x0A, 0x0B,
240
-                         0x0C, 0x0D, 0x0E, 0x0F};
241
-    } else {
242
-        fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
243
-                         0x04, 0x05, 0x06, 0x07,
244
-                         0x18, 0x19, 0x1A, 0x1B,
245
-                         0x1C, 0x1D, 0x1E, 0x1F};
246
-    }
247
-
248
-    vsrcAuc = vec_ld(0, src);
249
-
250
-    if (loadSecond)
251
-        vsrcBuc = vec_ld(16, src);
252
-    vsrcperm0 = vec_lvsl(0, src);
253
-    vsrcperm1 = vec_lvsl(1, src);
254
-
255
-    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
256
-    if (reallyBadAlign)
257
-        vsrc1uc = vsrcBuc;
258
-    else
259
-        vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
260
-
261
-    vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
262
-    vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
263
-
264
-    if (!loadSecond) {// -> !reallyBadAlign
265
-        for (i = 0 ; i < h ; i++) {
266
-
267
-
268
-            vsrcCuc = vec_ld(stride + 0, src);
269
-
270
-            vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
271
-            vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
272
-
273
-            CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
274
-        }
275
-    } else {
276
-        vec_u8 vsrcDuc;
277
-        for (i = 0 ; i < h ; i++) {
278
-            vsrcCuc = vec_ld(stride + 0, src);
279
-            vsrcDuc = vec_ld(stride + 16, src);
280
-
281
-            vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
282
-            if (reallyBadAlign)
283
-                vsrc3uc = vsrcDuc;
284
-            else
285
-                vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
286
-
287
-            CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
288
-        }
289
-    }
290
-}
291
-#endif
292
-
293
-#undef noop
294
-#undef add28
295
-#undef CHROMA_MC8_ALTIVEC_CORE
296
-
297 29
 /* this code assume stride % 16 == 0 */
298 30
 #ifdef PREFIX_h264_qpel16_h_lowpass_altivec
299 31
 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
300 32
new file mode 100644
... ...
@@ -0,0 +1,64 @@
0
+/*
1
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "config.h"
21
+#include "libavutil/attributes.h"
22
+#include "libavcodec/h264chroma.h"
23
+
24
+#if HAVE_ALTIVEC
25
+#include "libavutil/cpu.h"
26
+#include "libavutil/intreadwrite.h"
27
+#include "libavutil/ppc/types_altivec.h"
28
+#include "libavutil/ppc/util_altivec.h"
29
+#include "dsputil_altivec.h"
30
+
31
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
32
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
33
+
34
+#define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
35
+#define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
36
+#define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
37
+#include "h264chroma_template.c"
38
+#undef OP_U8_ALTIVEC
39
+#undef PREFIX_h264_chroma_mc8_altivec
40
+#undef PREFIX_h264_chroma_mc8_num
41
+
42
+#define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
43
+#define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
44
+#define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
45
+#include "h264chroma_template.c"
46
+#undef OP_U8_ALTIVEC
47
+#undef PREFIX_h264_chroma_mc8_altivec
48
+#undef PREFIX_h264_chroma_mc8_num
49
+#endif /* HAVE_ALTIVEC */
50
+
51
+av_cold void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth)
52
+{
53
+#if HAVE_ALTIVEC
54
+    const int high_bit_depth = bit_depth > 8;
55
+
56
+    if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
57
+    if (!high_bit_depth) {
58
+        c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
59
+        c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
60
+    }
61
+    }
62
+#endif /* HAVE_ALTIVEC */
63
+}
0 64
new file mode 100644
... ...
@@ -0,0 +1,289 @@
0
+/*
1
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/mem.h"
21
+
22
+/* this code assume that stride % 16 == 0 */
23
+
24
+#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \
25
+        vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
26
+        vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
27
+\
28
+        psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
29
+        psum = vec_mladd(vB, vsrc1ssH, psum);\
30
+        psum = vec_mladd(vC, vsrc2ssH, psum);\
31
+        psum = vec_mladd(vD, vsrc3ssH, psum);\
32
+        psum = BIAS2(psum);\
33
+        psum = vec_sr(psum, v6us);\
34
+\
35
+        vdst = vec_ld(0, dst);\
36
+        ppsum = (vec_u8)vec_pack(psum, psum);\
37
+        vfdst = vec_perm(vdst, ppsum, fperm);\
38
+\
39
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);\
40
+\
41
+        vec_st(fsum, 0, dst);\
42
+\
43
+        vsrc0ssH = vsrc2ssH;\
44
+        vsrc1ssH = vsrc3ssH;\
45
+\
46
+        dst += stride;\
47
+        src += stride;
48
+
49
+#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
50
+\
51
+        vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
52
+        vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
53
+\
54
+        psum = vec_mladd(vA, vsrc0ssH, v32ss);\
55
+        psum = vec_mladd(vE, vsrc1ssH, psum);\
56
+        psum = vec_sr(psum, v6us);\
57
+\
58
+        vdst = vec_ld(0, dst);\
59
+        ppsum = (vec_u8)vec_pack(psum, psum);\
60
+        vfdst = vec_perm(vdst, ppsum, fperm);\
61
+\
62
+        OP_U8_ALTIVEC(fsum, vfdst, vdst);\
63
+\
64
+        vec_st(fsum, 0, dst);\
65
+\
66
+        dst += stride;\
67
+        src += stride;
68
+
69
+#define noop(a) a
70
+#define add28(a) vec_add(v28ss, a)
71
+
72
+#ifdef PREFIX_h264_chroma_mc8_altivec
73
+static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
74
+                                    int stride, int h, int x, int y) {
75
+    DECLARE_ALIGNED(16, signed int, ABCD)[4] =
76
+                        {((8 - x) * (8 - y)),
77
+                         ((    x) * (8 - y)),
78
+                         ((8 - x) * (    y)),
79
+                         ((    x) * (    y))};
80
+    register int i;
81
+    vec_u8 fperm;
82
+    const vec_s32 vABCD = vec_ld(0, ABCD);
83
+    const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
84
+    const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
85
+    const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
86
+    const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
87
+    LOAD_ZERO;
88
+    const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
89
+    const vec_u16 v6us = vec_splat_u16(6);
90
+    register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
91
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
92
+
93
+    vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
94
+    vec_u8 vsrc0uc, vsrc1uc;
95
+    vec_s16 vsrc0ssH, vsrc1ssH;
96
+    vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
97
+    vec_s16 vsrc2ssH, vsrc3ssH, psum;
98
+    vec_u8 vdst, ppsum, vfdst, fsum;
99
+
100
+    if (((unsigned long)dst) % 16 == 0) {
101
+        fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
102
+                         0x14, 0x15, 0x16, 0x17,
103
+                         0x08, 0x09, 0x0A, 0x0B,
104
+                         0x0C, 0x0D, 0x0E, 0x0F};
105
+    } else {
106
+        fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
107
+                         0x04, 0x05, 0x06, 0x07,
108
+                         0x18, 0x19, 0x1A, 0x1B,
109
+                         0x1C, 0x1D, 0x1E, 0x1F};
110
+    }
111
+
112
+    vsrcAuc = vec_ld(0, src);
113
+
114
+    if (loadSecond)
115
+        vsrcBuc = vec_ld(16, src);
116
+    vsrcperm0 = vec_lvsl(0, src);
117
+    vsrcperm1 = vec_lvsl(1, src);
118
+
119
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
120
+    if (reallyBadAlign)
121
+        vsrc1uc = vsrcBuc;
122
+    else
123
+        vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
124
+
125
+    vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
126
+    vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
127
+
128
+    if (ABCD[3]) {
129
+        if (!loadSecond) {// -> !reallyBadAlign
130
+            for (i = 0 ; i < h ; i++) {
131
+                vsrcCuc = vec_ld(stride + 0, src);
132
+                vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
133
+                vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
134
+
135
+                CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
136
+            }
137
+        } else {
138
+            vec_u8 vsrcDuc;
139
+            for (i = 0 ; i < h ; i++) {
140
+                vsrcCuc = vec_ld(stride + 0, src);
141
+                vsrcDuc = vec_ld(stride + 16, src);
142
+                vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
143
+                if (reallyBadAlign)
144
+                    vsrc3uc = vsrcDuc;
145
+                else
146
+                    vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
147
+
148
+                CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
149
+            }
150
+        }
151
+    } else {
152
+        const vec_s16 vE = vec_add(vB, vC);
153
+        if (ABCD[2]) { // x == 0 B == 0
154
+            if (!loadSecond) {// -> !reallyBadAlign
155
+                for (i = 0 ; i < h ; i++) {
156
+                    vsrcCuc = vec_ld(stride + 0, src);
157
+                    vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
158
+                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
159
+
160
+                    vsrc0uc = vsrc1uc;
161
+                }
162
+            } else {
163
+                vec_u8 vsrcDuc;
164
+                for (i = 0 ; i < h ; i++) {
165
+                    vsrcCuc = vec_ld(stride + 0, src);
166
+                    vsrcDuc = vec_ld(stride + 15, src);
167
+                    vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
168
+                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
169
+
170
+                    vsrc0uc = vsrc1uc;
171
+                }
172
+            }
173
+        } else { // y == 0 C == 0
174
+            if (!loadSecond) {// -> !reallyBadAlign
175
+                for (i = 0 ; i < h ; i++) {
176
+                    vsrcCuc = vec_ld(0, src);
177
+                    vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
178
+                    vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
179
+
180
+                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
181
+                }
182
+            } else {
183
+                vec_u8 vsrcDuc;
184
+                for (i = 0 ; i < h ; i++) {
185
+                    vsrcCuc = vec_ld(0, src);
186
+                    vsrcDuc = vec_ld(15, src);
187
+                    vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
188
+                    if (reallyBadAlign)
189
+                        vsrc1uc = vsrcDuc;
190
+                    else
191
+                        vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
192
+
193
+                    CHROMA_MC8_ALTIVEC_CORE_SIMPLE
194
+                }
195
+            }
196
+        }
197
+    }
198
+}
199
+#endif
200
+
201
+/* this code assume that stride % 16 == 0 */
202
+#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec
203
+static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
204
+   DECLARE_ALIGNED(16, signed int, ABCD)[4] =
205
+                        {((8 - x) * (8 - y)),
206
+                         ((    x) * (8 - y)),
207
+                         ((8 - x) * (    y)),
208
+                         ((    x) * (    y))};
209
+    register int i;
210
+    vec_u8 fperm;
211
+    const vec_s32 vABCD = vec_ld(0, ABCD);
212
+    const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
213
+    const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
214
+    const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
215
+    const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
216
+    LOAD_ZERO;
217
+    const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
218
+    const vec_u16 v6us  = vec_splat_u16(6);
219
+    register int loadSecond     = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
220
+    register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
221
+
222
+    vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
223
+    vec_u8 vsrc0uc, vsrc1uc;
224
+    vec_s16 vsrc0ssH, vsrc1ssH;
225
+    vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
226
+    vec_s16 vsrc2ssH, vsrc3ssH, psum;
227
+    vec_u8 vdst, ppsum, vfdst, fsum;
228
+
229
+    if (((unsigned long)dst) % 16 == 0) {
230
+        fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
231
+                         0x14, 0x15, 0x16, 0x17,
232
+                         0x08, 0x09, 0x0A, 0x0B,
233
+                         0x0C, 0x0D, 0x0E, 0x0F};
234
+    } else {
235
+        fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
236
+                         0x04, 0x05, 0x06, 0x07,
237
+                         0x18, 0x19, 0x1A, 0x1B,
238
+                         0x1C, 0x1D, 0x1E, 0x1F};
239
+    }
240
+
241
+    vsrcAuc = vec_ld(0, src);
242
+
243
+    if (loadSecond)
244
+        vsrcBuc = vec_ld(16, src);
245
+    vsrcperm0 = vec_lvsl(0, src);
246
+    vsrcperm1 = vec_lvsl(1, src);
247
+
248
+    vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
249
+    if (reallyBadAlign)
250
+        vsrc1uc = vsrcBuc;
251
+    else
252
+        vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
253
+
254
+    vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
255
+    vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
256
+
257
+    if (!loadSecond) {// -> !reallyBadAlign
258
+        for (i = 0 ; i < h ; i++) {
259
+
260
+
261
+            vsrcCuc = vec_ld(stride + 0, src);
262
+
263
+            vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
264
+            vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
265
+
266
+            CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
267
+        }
268
+    } else {
269
+        vec_u8 vsrcDuc;
270
+        for (i = 0 ; i < h ; i++) {
271
+            vsrcCuc = vec_ld(stride + 0, src);
272
+            vsrcDuc = vec_ld(stride + 16, src);
273
+
274
+            vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
275
+            if (reallyBadAlign)
276
+                vsrc3uc = vsrcDuc;
277
+            else
278
+                vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
279
+
280
+            CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
281
+        }
282
+    }
283
+}
284
+#endif
285
+
286
+#undef noop
287
+#undef add28
288
+#undef CHROMA_MC8_ALTIVEC_CORE
... ...
@@ -326,13 +326,13 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, int16_t *block)
326 326
 
327 327
 #define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
328 328
 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec   put_no_rnd_vc1_chroma_mc8_altivec
329
-#include "h264_qpel_template.c"
329
+#include "h264chroma_template.c"
330 330
 #undef OP_U8_ALTIVEC
331 331
 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
332 332
 
333 333
 #define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
334 334
 #define PREFIX_no_rnd_vc1_chroma_mc8_altivec   avg_no_rnd_vc1_chroma_mc8_altivec
335
-#include "h264_qpel_template.c"
335
+#include "h264chroma_template.c"
336 336
 #undef OP_U8_ALTIVEC
337 337
 #undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
338 338
 
... ...
@@ -26,6 +26,7 @@
26 26
 
27 27
 #include "avcodec.h"
28 28
 #include "dsputil.h"
29
+#include "h264chroma.h"
29 30
 #include "h264qpel.h"
30 31
 #include "rv34dsp.h"
31 32
 
... ...
@@ -254,9 +255,11 @@ RV30_MC(avg_, 8)
254 254
 RV30_MC(avg_, 16)
255 255
 
256 256
 av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) {
257
+    H264ChromaContext h264chroma;
257 258
     H264QpelContext qpel;
258 259
 
259 260
     ff_rv34dsp_init(c, dsp);
261
+    ff_h264chroma_init(&h264chroma, 8);
260 262
     ff_h264qpel_init(&qpel, 8);
261 263
 
262 264
     c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
... ...
@@ -296,8 +299,8 @@ av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) {
296 296
     c->avg_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c;
297 297
     c->avg_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c;
298 298
 
299
-    c->put_chroma_pixels_tab[0] = dsp->put_h264_chroma_pixels_tab[0];
300
-    c->put_chroma_pixels_tab[1] = dsp->put_h264_chroma_pixels_tab[1];
301
-    c->avg_chroma_pixels_tab[0] = dsp->avg_h264_chroma_pixels_tab[0];
302
-    c->avg_chroma_pixels_tab[1] = dsp->avg_h264_chroma_pixels_tab[1];
299
+    c->put_chroma_pixels_tab[0] = h264chroma.put_h264_chroma_pixels_tab[0];
300
+    c->put_chroma_pixels_tab[1] = h264chroma.put_h264_chroma_pixels_tab[1];
301
+    c->avg_chroma_pixels_tab[0] = h264chroma.avg_h264_chroma_pixels_tab[0];
302
+    c->avg_chroma_pixels_tab[1] = h264chroma.avg_h264_chroma_pixels_tab[1];
303 303
 }
... ...
@@ -28,6 +28,7 @@
28 28
 #define AVCODEC_RV34DSP_H
29 29
 
30 30
 #include "dsputil.h"
31
+#include "h264chroma.h"
31 32
 
32 33
 typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
33 34
                                  uint8_t *src1/*align width (8 or 16)*/,
... ...
@@ -1,3 +1,5 @@
1 1
 OBJS += sh4/dsputil_align.o                                             \
2 2
         sh4/dsputil_sh4.o                                               \
3 3
         sh4/idct_sh4.o                                                  \
4
+
5
+OBJS-$(CONFIG_H264CHROMA)               += sh4/h264chroma_init.o        \
... ...
@@ -369,14 +369,6 @@ av_cold void ff_dsputil_init_align(DSPContext *c, AVCodecContext *avctx)
369 369
     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
370 370
 
371 371
 #undef dspfunc
372
-    if (!high_bit_depth) {
373
-    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
374
-    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
375
-    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
376
-    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
377
-    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
378
-    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
379
-    }
380 372
 
381 373
     c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
382 374
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
383 375
new file mode 100644
... ...
@@ -0,0 +1,132 @@
0
+/*
1
+ * aligned/packed access motion
2
+ *
3
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include <assert.h>
23
+#include <stdint.h>
24
+
25
+#include "libavutil/attributes.h"
26
+#include "libavcodec/h264chroma.h"
27
+
28
+#define H264_CHROMA_MC(OPNAME, OP)\
29
+static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
30
+    const int A=(8-x)*(8-y);\
31
+    const int B=(  x)*(8-y);\
32
+    const int C=(8-x)*(  y);\
33
+    const int D=(  x)*(  y);\
34
+    \
35
+    assert(x<8 && y<8 && x>=0 && y>=0);\
36
+\
37
+    do {\
38
+        int t0,t1,t2,t3; \
39
+        uint8_t *s0 = src; \
40
+        uint8_t *s1 = src+stride; \
41
+        t0 = *s0++; t2 = *s1++; \
42
+        t1 = *s0++; t3 = *s1++; \
43
+        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
44
+        t0 = *s0++; t2 = *s1++; \
45
+        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
46
+        dst+= stride;\
47
+        src+= stride;\
48
+    }while(--h);\
49
+}\
50
+\
51
+static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
52
+    const int A=(8-x)*(8-y);\
53
+    const int B=(  x)*(8-y);\
54
+    const int C=(8-x)*(  y);\
55
+    const int D=(  x)*(  y);\
56
+    \
57
+    assert(x<8 && y<8 && x>=0 && y>=0);\
58
+\
59
+    do {\
60
+        int t0,t1,t2,t3; \
61
+        uint8_t *s0 = src; \
62
+        uint8_t *s1 = src+stride; \
63
+        t0 = *s0++; t2 = *s1++; \
64
+        t1 = *s0++; t3 = *s1++; \
65
+        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
66
+        t0 = *s0++; t2 = *s1++; \
67
+        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
68
+        t1 = *s0++; t3 = *s1++; \
69
+        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
70
+        t0 = *s0++; t2 = *s1++; \
71
+        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
72
+        dst+= stride;\
73
+        src+= stride;\
74
+    }while(--h);\
75
+}\
76
+\
77
+static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
78
+    const int A=(8-x)*(8-y);\
79
+    const int B=(  x)*(8-y);\
80
+    const int C=(8-x)*(  y);\
81
+    const int D=(  x)*(  y);\
82
+    \
83
+    assert(x<8 && y<8 && x>=0 && y>=0);\
84
+\
85
+    do {\
86
+        int t0,t1,t2,t3; \
87
+        uint8_t *s0 = src; \
88
+        uint8_t *s1 = src+stride; \
89
+        t0 = *s0++; t2 = *s1++; \
90
+        t1 = *s0++; t3 = *s1++; \
91
+        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
92
+        t0 = *s0++; t2 = *s1++; \
93
+        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
94
+        t1 = *s0++; t3 = *s1++; \
95
+        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
96
+        t0 = *s0++; t2 = *s1++; \
97
+        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
98
+        t1 = *s0++; t3 = *s1++; \
99
+        OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
100
+        t0 = *s0++; t2 = *s1++; \
101
+        OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
102
+        t1 = *s0++; t3 = *s1++; \
103
+        OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
104
+        t0 = *s0++; t2 = *s1++; \
105
+        OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
106
+        dst+= stride;\
107
+        src+= stride;\
108
+    }while(--h);\
109
+}
110
+
111
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
112
+#define op_put(a, b) a = (((b) + 32)>>6)
113
+
114
+H264_CHROMA_MC(put_       , op_put)
115
+H264_CHROMA_MC(avg_       , op_avg)
116
+#undef op_avg
117
+#undef op_put
118
+
119
+av_cold void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth)
120
+{
121
+    const int high_bit_depth = bit_depth > 8;
122
+
123
+    if (!high_bit_depth) {
124
+    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
125
+    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
126
+    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
127
+    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
128
+    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
129
+    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
130
+    }
131
+}
... ...
@@ -359,97 +359,6 @@ static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y
359 359
     }while(--h);
360 360
 }
361 361
 
362
-#define H264_CHROMA_MC(OPNAME, OP)\
363
-static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
364
-    const int A=(8-x)*(8-y);\
365
-    const int B=(  x)*(8-y);\
366
-    const int C=(8-x)*(  y);\
367
-    const int D=(  x)*(  y);\
368
-    \
369
-    assert(x<8 && y<8 && x>=0 && y>=0);\
370
-\
371
-    do {\
372
-        int t0,t1,t2,t3; \
373
-        uint8_t *s0 = src; \
374
-        uint8_t *s1 = src+stride; \
375
-        t0 = *s0++; t2 = *s1++; \
376
-        t1 = *s0++; t3 = *s1++; \
377
-        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
378
-        t0 = *s0++; t2 = *s1++; \
379
-        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
380
-        dst+= stride;\
381
-        src+= stride;\
382
-    }while(--h);\
383
-}\
384
-\
385
-static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
386
-    const int A=(8-x)*(8-y);\
387
-    const int B=(  x)*(8-y);\
388
-    const int C=(8-x)*(  y);\
389
-    const int D=(  x)*(  y);\
390
-    \
391
-    assert(x<8 && y<8 && x>=0 && y>=0);\
392
-\
393
-    do {\
394
-        int t0,t1,t2,t3; \
395
-        uint8_t *s0 = src; \
396
-        uint8_t *s1 = src+stride; \
397
-        t0 = *s0++; t2 = *s1++; \
398
-        t1 = *s0++; t3 = *s1++; \
399
-        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
400
-        t0 = *s0++; t2 = *s1++; \
401
-        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
402
-        t1 = *s0++; t3 = *s1++; \
403
-        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
404
-        t0 = *s0++; t2 = *s1++; \
405
-        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
406
-        dst+= stride;\
407
-        src+= stride;\
408
-    }while(--h);\
409
-}\
410
-\
411
-static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
412
-    const int A=(8-x)*(8-y);\
413
-    const int B=(  x)*(8-y);\
414
-    const int C=(8-x)*(  y);\
415
-    const int D=(  x)*(  y);\
416
-    \
417
-    assert(x<8 && y<8 && x>=0 && y>=0);\
418
-\
419
-    do {\
420
-        int t0,t1,t2,t3; \
421
-        uint8_t *s0 = src; \
422
-        uint8_t *s1 = src+stride; \
423
-        t0 = *s0++; t2 = *s1++; \
424
-        t1 = *s0++; t3 = *s1++; \
425
-        OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
426
-        t0 = *s0++; t2 = *s1++; \
427
-        OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
428
-        t1 = *s0++; t3 = *s1++; \
429
-        OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
430
-        t0 = *s0++; t2 = *s1++; \
431
-        OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
432
-        t1 = *s0++; t3 = *s1++; \
433
-        OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
434
-        t0 = *s0++; t2 = *s1++; \
435
-        OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
436
-        t1 = *s0++; t3 = *s1++; \
437
-        OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
438
-        t0 = *s0++; t2 = *s1++; \
439
-        OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
440
-        dst+= stride;\
441
-        src+= stride;\
442
-    }while(--h);\
443
-}
444
-
445
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
446
-#define op_put(a, b) a = (((b) + 32)>>6)
447
-
448
-H264_CHROMA_MC(put_       , op_put)
449
-H264_CHROMA_MC(avg_       , op_avg)
450
-#undef op_avg
451
-#undef op_put
452
-
453 362
 #define QPEL_MC(r, OPNAME, RND, OP) \
454 363
 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
455 364
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
... ...
@@ -24,6 +24,7 @@
24 24
 #define AVCODEC_VC1_H
25 25
 
26 26
 #include "avcodec.h"
27
+#include "h264chroma.h"
27 28
 #include "mpegvideo.h"
28 29
 #include "intrax8.h"
29 30
 #include "vc1dsp.h"
... ...
@@ -181,6 +182,7 @@ enum FrameCodingMode {
181 181
 typedef struct VC1Context{
182 182
     MpegEncContext s;
183 183
     IntraX8Context x8;
184
+    H264ChromaContext h264chroma;
184 185
     VC1DSPContext vc1dsp;
185 186
 
186 187
     int bits;
... ...
@@ -31,6 +31,7 @@
31 31
 #include "avcodec.h"
32 32
 #include "mpegvideo.h"
33 33
 #include "h263.h"
34
+#include "h264chroma.h"
34 35
 #include "vc1.h"
35 36
 #include "vc1data.h"
36 37
 #include "vc1acdata.h"
... ...
@@ -331,6 +332,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
331 331
 {
332 332
     MpegEncContext *s = &v->s;
333 333
     DSPContext *dsp   = &v->s.dsp;
334
+    H264ChromaContext *h264chroma = &v->h264chroma;
334 335
     uint8_t *srcY, *srcU, *srcV;
335 336
     int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
336 337
     int off, off_uv;
... ...
@@ -519,8 +521,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
519 519
     uvmx = (uvmx & 3) << 1;
520 520
     uvmy = (uvmy & 3) << 1;
521 521
     if (!v->rnd) {
522
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
523
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
522
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
523
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
524 524
     } else {
525 525
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
526 526
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
... ...
@@ -769,7 +771,7 @@ static av_always_inline int get_chroma_mv(int *mvx, int *mvy, int *a, int flag,
769 769
 static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
770 770
 {
771 771
     MpegEncContext *s = &v->s;
772
-    DSPContext *dsp   = &v->s.dsp;
772
+    H264ChromaContext *h264chroma = &v->h264chroma;
773 773
     uint8_t *srcU, *srcV;
774 774
     int uvmx, uvmy, uvsrc_x, uvsrc_y;
775 775
     int k, tx = 0, ty = 0;
... ...
@@ -915,8 +917,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
915 915
     uvmx = (uvmx & 3) << 1;
916 916
     uvmy = (uvmy & 3) << 1;
917 917
     if (!v->rnd) {
918
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
919
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
918
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
919
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
920 920
     } else {
921 921
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
922 922
         v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
... ...
@@ -928,7 +930,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
928 928
 static void vc1_mc_4mv_chroma4(VC1Context *v)
929 929
 {
930 930
     MpegEncContext *s = &v->s;
931
-    DSPContext *dsp = &v->s.dsp;
931
+    H264ChromaContext *h264chroma = &v->h264chroma;
932 932
     uint8_t *srcU, *srcV;
933 933
     int uvsrc_x, uvsrc_y;
934 934
     int uvmx_field[4], uvmy_field[4];
... ...
@@ -1000,8 +1002,8 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
1000 1000
             }
1001 1001
         }
1002 1002
         if (!v->rnd) {
1003
-            dsp->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
1004
-            dsp->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
1003
+            h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
1004
+            h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
1005 1005
         } else {
1006 1006
             v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
1007 1007
             v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
... ...
@@ -1828,6 +1830,7 @@ static void vc1_interp_mc(VC1Context *v)
1828 1828
 {
1829 1829
     MpegEncContext *s = &v->s;
1830 1830
     DSPContext *dsp = &v->s.dsp;
1831
+    H264ChromaContext *h264chroma = &v->h264chroma;
1831 1832
     uint8_t *srcY, *srcU, *srcV;
1832 1833
     int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
1833 1834
     int off, off_uv;
... ...
@@ -1957,8 +1960,8 @@ static void vc1_interp_mc(VC1Context *v)
1957 1957
     uvmx = (uvmx & 3) << 1;
1958 1958
     uvmy = (uvmy & 3) << 1;
1959 1959
     if (!v->rnd) {
1960
-        dsp->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
1961
-        dsp->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
1960
+        h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
1961
+        h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
1962 1962
     } else {
1963 1963
         v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
1964 1964
         v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
... ...
@@ -5164,6 +5167,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
5164 5164
 
5165 5165
     if (ff_vc1_init_common(v) < 0)
5166 5166
         return -1;
5167
+    ff_h264chroma_init(&v->h264chroma, 8);
5167 5168
     ff_vc1dsp_init(&v->vc1dsp);
5168 5169
 
5169 5170
     if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
... ...
@@ -25,8 +25,9 @@
25 25
  *
26 26
  */
27 27
 
28
-#include "vc1dsp.h"
29 28
 #include "libavutil/common.h"
29
+#include "h264chroma.h"
30
+#include "vc1dsp.h"
30 31
 
31 32
 
32 33
 /** Apply overlap transform to horizontal edge
... ...
@@ -29,6 +29,7 @@
29 29
 #define AVCODEC_VC1DSP_H
30 30
 
31 31
 #include "dsputil.h"
32
+#include "h264chroma.h"
32 33
 
33 34
 typedef struct VC1DSPContext {
34 35
     /* vc1 functions */
... ...
@@ -26,7 +26,7 @@
26 26
 #include "avcodec.h"
27 27
 #include "bytestream.h"
28 28
 #include "internal.h"
29
-
29
+#include "h264chroma.h"
30 30
 #include "vp56.h"
31 31
 #include "vp56data.h"
32 32
 
... ...
@@ -674,6 +674,7 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
674 674
     avctx->pix_fmt = has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
675 675
 
676 676
     ff_dsputil_init(&s->dsp, avctx);
677
+    ff_h264chroma_init(&s->h264chroma, 8);
677 678
     ff_videodsp_init(&s->vdsp, 8);
678 679
     ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
679 680
     ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id);
... ...
@@ -30,6 +30,7 @@
30 30
 #include "dsputil.h"
31 31
 #include "get_bits.h"
32 32
 #include "bytestream.h"
33
+#include "h264chroma.h"
33 34
 #include "videodsp.h"
34 35
 #include "vp3dsp.h"
35 36
 #include "vp56dsp.h"
... ...
@@ -95,6 +96,7 @@ typedef struct VP56Model {
95 95
 struct vp56_context {
96 96
     AVCodecContext *avctx;
97 97
     DSPContext dsp;
98
+    H264ChromaContext h264chroma;
98 99
     VideoDSPContext vdsp;
99 100
     VP3DSPContext vp3dsp;
100 101
     VP56DSPContext vp56dsp;
... ...
@@ -536,8 +536,8 @@ static void vp6_filter_diag2(VP56Context *s, uint8_t *dst, uint8_t *src,
536 536
                              int stride, int h_weight, int v_weight)
537 537
 {
538 538
     uint8_t *tmp = s->edge_emu_buffer+16;
539
-    s->dsp.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0);
540
-    s->dsp.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight);
539
+    s->h264chroma.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0);
540
+    s->h264chroma.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight);
541 541
 }
542 542
 
543 543
 static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
... ...
@@ -583,7 +583,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
583 583
         }
584 584
     } else {
585 585
         if (!x8 || !y8) {
586
-            s->dsp.put_h264_chroma_pixels_tab[0](dst, src+offset1, stride, 8, x8, y8);
586
+            s->h264chroma.put_h264_chroma_pixels_tab[0](dst, src + offset1, stride, 8, x8, y8);
587 587
         } else {
588 588
             vp6_filter_diag2(s, dst, src+offset1 + ((mv.x^mv.y)>>31), stride, x8, y8);
589 589
         }
... ...
@@ -43,7 +43,8 @@ YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
43 43
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
44 44
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc.o
45 45
 YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
46
-YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
46
+YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264chroma_init.o         \
47
+                                          x86/h264_chromamc.o           \
47 48
                                           x86/h264_chromamc_10bit.o
48 49
 YASM-OBJS-$(CONFIG_H264DSP)            += x86/h264_deblock.o            \
49 50
                                           x86/h264_deblock_10bit.o      \
... ...
@@ -1460,49 +1460,6 @@ void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
1460 1460
 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
1461 1461
                           ptrdiff_t line_size, int h);
1462 1462
 
1463
-void ff_put_h264_chroma_mc8_rnd_mmx  (uint8_t *dst, uint8_t *src,
1464
-                                      int stride, int h, int x, int y);
1465
-void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
1466
-                                       int stride, int h, int x, int y);
1467
-void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
1468
-                                      int stride, int h, int x, int y);
1469
-
1470
-void ff_put_h264_chroma_mc4_mmx      (uint8_t *dst, uint8_t *src,
1471
-                                      int stride, int h, int x, int y);
1472
-void ff_avg_h264_chroma_mc4_mmxext   (uint8_t *dst, uint8_t *src,
1473
-                                      int stride, int h, int x, int y);
1474
-void ff_avg_h264_chroma_mc4_3dnow    (uint8_t *dst, uint8_t *src,
1475
-                                      int stride, int h, int x, int y);
1476
-
1477
-void ff_put_h264_chroma_mc2_mmxext   (uint8_t *dst, uint8_t *src,
1478
-                                      int stride, int h, int x, int y);
1479
-void ff_avg_h264_chroma_mc2_mmxext   (uint8_t *dst, uint8_t *src,
1480
-                                      int stride, int h, int x, int y);
1481
-
1482
-void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
1483
-                                      int stride, int h, int x, int y);
1484
-void ff_put_h264_chroma_mc4_ssse3    (uint8_t *dst, uint8_t *src,
1485
-                                      int stride, int h, int x, int y);
1486
-
1487
-void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
1488
-                                      int stride, int h, int x, int y);
1489
-void ff_avg_h264_chroma_mc4_ssse3    (uint8_t *dst, uint8_t *src,
1490
-                                      int stride, int h, int x, int y);
1491
-
1492
-#define CHROMA_MC(OP, NUM, DEPTH, OPT)                                  \
1493
-void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT      \
1494
-                                      (uint8_t *dst, uint8_t *src,      \
1495
-                                       int stride, int h, int x, int y);
1496
-
1497
-CHROMA_MC(put, 2, 10, mmxext)
1498
-CHROMA_MC(avg, 2, 10, mmxext)
1499
-CHROMA_MC(put, 4, 10, mmxext)
1500
-CHROMA_MC(avg, 4, 10, mmxext)
1501
-CHROMA_MC(put, 8, 10, sse2)
1502
-CHROMA_MC(avg, 8, 10, sse2)
1503
-CHROMA_MC(put, 8, 10, avx)
1504
-CHROMA_MC(avg, 8, 10, avx)
1505
-
1506 1463
 #if HAVE_INLINE_ASM
1507 1464
 
1508 1465
 /* CAVS-specific */
... ...
@@ -1704,11 +1661,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
1704 1704
 #endif /* HAVE_INLINE_ASM */
1705 1705
 
1706 1706
 #if HAVE_YASM
1707
-    if (!high_bit_depth && CONFIG_H264CHROMA) {
1708
-        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
1709
-        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
1710
-    }
1711
-
1712 1707
     c->vector_clip_int32 = ff_vector_clip_int32_mmx;
1713 1708
 #endif
1714 1709
 
... ...
@@ -1773,19 +1725,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
1773 1773
         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
1774 1774
     }
1775 1775
 
1776
-    if (!high_bit_depth && CONFIG_H264CHROMA) {
1777
-        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
1778
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
1779
-        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
1780
-        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
1781
-    }
1782
-    if (bit_depth == 10 && CONFIG_H264CHROMA) {
1783
-        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
1784
-        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
1785
-        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
1786
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
1787
-    }
1788
-
1789 1776
     /* slower than cmov version on AMD */
1790 1777
     if (!(mm_flags & AV_CPU_FLAG_3DNOW))
1791 1778
         c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext;
... ...
@@ -1838,11 +1777,6 @@ static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
1838 1838
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
1839 1839
         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
1840 1840
     }
1841
-
1842
-    if (!high_bit_depth && CONFIG_H264CHROMA) {
1843
-        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
1844
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
1845
-    }
1846 1841
 #endif /* HAVE_YASM */
1847 1842
 }
1848 1843
 
... ...
@@ -1889,13 +1823,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
1889 1889
         }
1890 1890
     }
1891 1891
 
1892
-    if (bit_depth == 10) {
1893
-        if (CONFIG_H264CHROMA) {
1894
-            c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
1895
-            c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
1896
-        }
1897
-    }
1898
-
1899 1892
     c->scalarproduct_int16          = ff_scalarproduct_int16_sse2;
1900 1893
     c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
1901 1894
     if (mm_flags & AV_CPU_FLAG_ATOM) {
... ...
@@ -1916,14 +1843,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
1916 1916
                                        int mm_flags)
1917 1917
 {
1918 1918
 #if HAVE_SSSE3_EXTERNAL
1919
-    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
1920
-
1921
-    if (!high_bit_depth && CONFIG_H264CHROMA) {
1922
-        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
1923
-        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
1924
-        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
1925
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
1926
-    }
1927 1919
     c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
1928 1920
     if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
1929 1921
         c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
... ...
@@ -1946,20 +1865,6 @@ static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
1946 1946
 #endif /* HAVE_SSE4_EXTERNAL */
1947 1947
 }
1948 1948
 
1949
-static av_cold void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
1950
-{
1951
-#if HAVE_AVX_EXTERNAL
1952
-    const int bit_depth = avctx->bits_per_raw_sample;
1953
-
1954
-    if (bit_depth == 10) {
1955
-        if (CONFIG_H264CHROMA) {
1956
-            c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
1957
-            c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
1958
-        }
1959
-    }
1960
-#endif /* HAVE_AVX_EXTERNAL */
1961
-}
1962
-
1963 1949
 av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
1964 1950
 {
1965 1951
     int mm_flags = av_get_cpu_flags();
... ...
@@ -1990,9 +1895,6 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
1990 1990
     if (mm_flags & AV_CPU_FLAG_SSE4)
1991 1991
         dsputil_init_sse4(c, avctx, mm_flags);
1992 1992
 
1993
-    if (mm_flags & AV_CPU_FLAG_AVX)
1994
-        dsputil_init_avx(c, avctx, mm_flags);
1995
-
1996 1993
     if (CONFIG_ENCODERS)
1997 1994
         ff_dsputilenc_init_mmx(c, avctx);
1998 1995
 }
1999 1996
new file mode 100644
... ...
@@ -0,0 +1,116 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include <stdint.h>
19
+
20
+#include "config.h"
21
+#include "libavutil/cpu.h"
22
+#include "libavutil/x86/cpu.h"
23
+#include "libavcodec/h264chroma.h"
24
+
25
+void ff_put_h264_chroma_mc8_rnd_mmx  (uint8_t *dst, uint8_t *src,
26
+                                      int stride, int h, int x, int y);
27
+void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
28
+                                       int stride, int h, int x, int y);
29
+void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
30
+                                      int stride, int h, int x, int y);
31
+
32
+void ff_put_h264_chroma_mc4_mmx      (uint8_t *dst, uint8_t *src,
33
+                                      int stride, int h, int x, int y);
34
+void ff_avg_h264_chroma_mc4_mmxext   (uint8_t *dst, uint8_t *src,
35
+                                      int stride, int h, int x, int y);
36
+void ff_avg_h264_chroma_mc4_3dnow    (uint8_t *dst, uint8_t *src,
37
+                                      int stride, int h, int x, int y);
38
+
39
+void ff_put_h264_chroma_mc2_mmxext   (uint8_t *dst, uint8_t *src,
40
+                                      int stride, int h, int x, int y);
41
+void ff_avg_h264_chroma_mc2_mmxext   (uint8_t *dst, uint8_t *src,
42
+                                      int stride, int h, int x, int y);
43
+
44
+void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
45
+                                      int stride, int h, int x, int y);
46
+void ff_put_h264_chroma_mc4_ssse3    (uint8_t *dst, uint8_t *src,
47
+                                      int stride, int h, int x, int y);
48
+
49
+void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
50
+                                      int stride, int h, int x, int y);
51
+void ff_avg_h264_chroma_mc4_ssse3    (uint8_t *dst, uint8_t *src,
52
+                                      int stride, int h, int x, int y);
53
+
54
+#define CHROMA_MC(OP, NUM, DEPTH, OPT)                                  \
55
+void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT      \
56
+                                      (uint8_t *dst, uint8_t *src,      \
57
+                                       int stride, int h, int x, int y);
58
+
59
+CHROMA_MC(put, 2, 10, mmxext)
60
+CHROMA_MC(avg, 2, 10, mmxext)
61
+CHROMA_MC(put, 4, 10, mmxext)
62
+CHROMA_MC(avg, 4, 10, mmxext)
63
+CHROMA_MC(put, 8, 10, sse2)
64
+CHROMA_MC(avg, 8, 10, sse2)
65
+CHROMA_MC(put, 8, 10, avx)
66
+CHROMA_MC(avg, 8, 10, avx)
67
+
68
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
69
+{
70
+    int high_bit_depth = bit_depth > 8;
71
+    int mm_flags       = av_get_cpu_flags();
72
+
73
+    if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) {
74
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
75
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
76
+    }
77
+
78
+    if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) {
79
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
80
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
81
+    }
82
+
83
+    if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) {
84
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
85
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
86
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
87
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
88
+    }
89
+
90
+    if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) {
91
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
92
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
93
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
94
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
95
+    }
96
+
97
+    if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) {
98
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
99
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
100
+    }
101
+
102
+    if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) {
103
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
104
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
105
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
106
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
107
+    }
108
+
109
+    if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) {
110
+        // AVX implies !cache64.
111
+        // TODO: Port cache(32|64) detection from x264.
112
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
113
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
114
+    }
115
+}