Browse code

Move RV3/4-specific DSP functions into their own context

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>

Kostya Shishkov authored on 2011/08/09 18:00:09
Showing 10 changed files
... ...
@@ -1280,16 +1280,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
1280 1280
 }
1281 1281
 
1282 1282
 #if CONFIG_RV40_DECODER
1283
-static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1283
+void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1284 1284
     put_pixels16_xy2_8_c(dst, src, stride, 16);
1285 1285
 }
1286
-static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1286
+void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1287 1287
     avg_pixels16_xy2_8_c(dst, src, stride, 16);
1288 1288
 }
1289
-static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1289
+void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1290 1290
     put_pixels8_xy2_8_c(dst, src, stride, 8);
1291 1291
 }
1292
-static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1292
+void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1293 1293
     avg_pixels8_xy2_8_c(dst, src, stride, 8);
1294 1294
 }
1295 1295
 #endif /* CONFIG_RV40_DECODER */
... ...
@@ -2903,16 +2903,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2903 2903
 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
2904 2904
     ff_intrax8dsp_init(c,avctx);
2905 2905
 #endif
2906
-#if CONFIG_RV30_DECODER
2907
-    ff_rv30dsp_init(c,avctx);
2908
-#endif
2909
-#if CONFIG_RV40_DECODER
2910
-    ff_rv40dsp_init(c,avctx);
2911
-    c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
2912
-    c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
2913
-    c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
2914
-    c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
2915
-#endif
2916 2906
 
2917 2907
     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2918 2908
     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
... ...
@@ -3124,16 +3114,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3124 3124
             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3125 3125
     }
3126 3126
 
3127
-    c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
3128
-    c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
3129
-    c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
3130
-    c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
3131
-
3132
-    c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
3133
-    c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
3134
-    c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
3135
-    c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
3136
-
3137 3127
     switch(c->idct_permutation_type){
3138 3128
     case FF_NO_IDCT_PERM:
3139 3129
         for(i=0; i<64; i++)
... ...
@@ -114,6 +114,12 @@ void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
114 114
 /* EA functions */
115 115
 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
116 116
 
117
+/* RV40 functions */
118
+void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
119
+void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride);
120
+void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride);
121
+void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride);
122
+
117 123
 /* 1/2^n downscaling functions from imgconvert.c */
118 124
 void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
119 125
 void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
... ...
@@ -542,16 +548,6 @@ typedef struct DSPContext {
542 542
     void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
543 543
                               int32_t max, unsigned int len);
544 544
 
545
-    /* rv30 functions */
546
-    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
547
-    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
548
-
549
-    /* rv40 functions */
550
-    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
551
-    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
552
-    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
553
-    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
554
-
555 545
     op_fill_func fill_block_tab[2];
556 546
 } DSPContext;
557 547
 
... ...
@@ -626,8 +622,6 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
626 626
 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
627 627
 
628 628
 void ff_dsputil_init_dwt(DSPContext *c);
629
-void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
630
-void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
631 629
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
632 630
 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
633 631
 void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
... ...
@@ -26,6 +26,7 @@
26 26
 
27 27
 #include "avcodec.h"
28 28
 #include "dsputil.h"
29
+#include "rv34dsp.h"
29 30
 
30 31
 #define RV30_LOWPASS(OPNAME, OP) \
31 32
 static av_unused void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
... ...
@@ -251,41 +252,46 @@ RV30_MC(put_, 16)
251 251
 RV30_MC(avg_, 8)
252 252
 RV30_MC(avg_, 16)
253 253
 
254
-av_cold void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx) {
255
-    c->put_rv30_tpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0];
256
-    c->put_rv30_tpel_pixels_tab[0][ 1] = put_rv30_tpel16_mc10_c;
257
-    c->put_rv30_tpel_pixels_tab[0][ 2] = put_rv30_tpel16_mc20_c;
258
-    c->put_rv30_tpel_pixels_tab[0][ 4] = put_rv30_tpel16_mc01_c;
259
-    c->put_rv30_tpel_pixels_tab[0][ 5] = put_rv30_tpel16_mc11_c;
260
-    c->put_rv30_tpel_pixels_tab[0][ 6] = put_rv30_tpel16_mc21_c;
261
-    c->put_rv30_tpel_pixels_tab[0][ 8] = put_rv30_tpel16_mc02_c;
262
-    c->put_rv30_tpel_pixels_tab[0][ 9] = put_rv30_tpel16_mc12_c;
263
-    c->put_rv30_tpel_pixels_tab[0][10] = put_rv30_tpel16_mc22_c;
264
-    c->avg_rv30_tpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0];
265
-    c->avg_rv30_tpel_pixels_tab[0][ 1] = avg_rv30_tpel16_mc10_c;
266
-    c->avg_rv30_tpel_pixels_tab[0][ 2] = avg_rv30_tpel16_mc20_c;
267
-    c->avg_rv30_tpel_pixels_tab[0][ 4] = avg_rv30_tpel16_mc01_c;
268
-    c->avg_rv30_tpel_pixels_tab[0][ 5] = avg_rv30_tpel16_mc11_c;
269
-    c->avg_rv30_tpel_pixels_tab[0][ 6] = avg_rv30_tpel16_mc21_c;
270
-    c->avg_rv30_tpel_pixels_tab[0][ 8] = avg_rv30_tpel16_mc02_c;
271
-    c->avg_rv30_tpel_pixels_tab[0][ 9] = avg_rv30_tpel16_mc12_c;
272
-    c->avg_rv30_tpel_pixels_tab[0][10] = avg_rv30_tpel16_mc22_c;
273
-    c->put_rv30_tpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0];
274
-    c->put_rv30_tpel_pixels_tab[1][ 1] = put_rv30_tpel8_mc10_c;
275
-    c->put_rv30_tpel_pixels_tab[1][ 2] = put_rv30_tpel8_mc20_c;
276
-    c->put_rv30_tpel_pixels_tab[1][ 4] = put_rv30_tpel8_mc01_c;
277
-    c->put_rv30_tpel_pixels_tab[1][ 5] = put_rv30_tpel8_mc11_c;
278
-    c->put_rv30_tpel_pixels_tab[1][ 6] = put_rv30_tpel8_mc21_c;
279
-    c->put_rv30_tpel_pixels_tab[1][ 8] = put_rv30_tpel8_mc02_c;
280
-    c->put_rv30_tpel_pixels_tab[1][ 9] = put_rv30_tpel8_mc12_c;
281
-    c->put_rv30_tpel_pixels_tab[1][10] = put_rv30_tpel8_mc22_c;
282
-    c->avg_rv30_tpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0];
283
-    c->avg_rv30_tpel_pixels_tab[1][ 1] = avg_rv30_tpel8_mc10_c;
284
-    c->avg_rv30_tpel_pixels_tab[1][ 2] = avg_rv30_tpel8_mc20_c;
285
-    c->avg_rv30_tpel_pixels_tab[1][ 4] = avg_rv30_tpel8_mc01_c;
286
-    c->avg_rv30_tpel_pixels_tab[1][ 5] = avg_rv30_tpel8_mc11_c;
287
-    c->avg_rv30_tpel_pixels_tab[1][ 6] = avg_rv30_tpel8_mc21_c;
288
-    c->avg_rv30_tpel_pixels_tab[1][ 8] = avg_rv30_tpel8_mc02_c;
289
-    c->avg_rv30_tpel_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c;
290
-    c->avg_rv30_tpel_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c;
254
+av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) {
255
+    c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0];
256
+    c->put_pixels_tab[0][ 1] = put_rv30_tpel16_mc10_c;
257
+    c->put_pixels_tab[0][ 2] = put_rv30_tpel16_mc20_c;
258
+    c->put_pixels_tab[0][ 4] = put_rv30_tpel16_mc01_c;
259
+    c->put_pixels_tab[0][ 5] = put_rv30_tpel16_mc11_c;
260
+    c->put_pixels_tab[0][ 6] = put_rv30_tpel16_mc21_c;
261
+    c->put_pixels_tab[0][ 8] = put_rv30_tpel16_mc02_c;
262
+    c->put_pixels_tab[0][ 9] = put_rv30_tpel16_mc12_c;
263
+    c->put_pixels_tab[0][10] = put_rv30_tpel16_mc22_c;
264
+    c->avg_pixels_tab[0][ 0] = dsp->avg_h264_qpel_pixels_tab[0][0];
265
+    c->avg_pixels_tab[0][ 1] = avg_rv30_tpel16_mc10_c;
266
+    c->avg_pixels_tab[0][ 2] = avg_rv30_tpel16_mc20_c;
267
+    c->avg_pixels_tab[0][ 4] = avg_rv30_tpel16_mc01_c;
268
+    c->avg_pixels_tab[0][ 5] = avg_rv30_tpel16_mc11_c;
269
+    c->avg_pixels_tab[0][ 6] = avg_rv30_tpel16_mc21_c;
270
+    c->avg_pixels_tab[0][ 8] = avg_rv30_tpel16_mc02_c;
271
+    c->avg_pixels_tab[0][ 9] = avg_rv30_tpel16_mc12_c;
272
+    c->avg_pixels_tab[0][10] = avg_rv30_tpel16_mc22_c;
273
+    c->put_pixels_tab[1][ 0] = dsp->put_h264_qpel_pixels_tab[1][0];
274
+    c->put_pixels_tab[1][ 1] = put_rv30_tpel8_mc10_c;
275
+    c->put_pixels_tab[1][ 2] = put_rv30_tpel8_mc20_c;
276
+    c->put_pixels_tab[1][ 4] = put_rv30_tpel8_mc01_c;
277
+    c->put_pixels_tab[1][ 5] = put_rv30_tpel8_mc11_c;
278
+    c->put_pixels_tab[1][ 6] = put_rv30_tpel8_mc21_c;
279
+    c->put_pixels_tab[1][ 8] = put_rv30_tpel8_mc02_c;
280
+    c->put_pixels_tab[1][ 9] = put_rv30_tpel8_mc12_c;
281
+    c->put_pixels_tab[1][10] = put_rv30_tpel8_mc22_c;
282
+    c->avg_pixels_tab[1][ 0] = dsp->avg_h264_qpel_pixels_tab[1][0];
283
+    c->avg_pixels_tab[1][ 1] = avg_rv30_tpel8_mc10_c;
284
+    c->avg_pixels_tab[1][ 2] = avg_rv30_tpel8_mc20_c;
285
+    c->avg_pixels_tab[1][ 4] = avg_rv30_tpel8_mc01_c;
286
+    c->avg_pixels_tab[1][ 5] = avg_rv30_tpel8_mc11_c;
287
+    c->avg_pixels_tab[1][ 6] = avg_rv30_tpel8_mc21_c;
288
+    c->avg_pixels_tab[1][ 8] = avg_rv30_tpel8_mc02_c;
289
+    c->avg_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c;
290
+    c->avg_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c;
291
+
292
+    c->put_chroma_pixels_tab[0] = dsp->put_h264_chroma_pixels_tab[0];
293
+    c->put_chroma_pixels_tab[1] = dsp->put_h264_chroma_pixels_tab[1];
294
+    c->avg_chroma_pixels_tab[0] = dsp->avg_h264_chroma_pixels_tab[0];
295
+    c->avg_chroma_pixels_tab[1] = dsp->avg_h264_chroma_pixels_tab[1];
291 296
 }
... ...
@@ -809,24 +809,18 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type,
809 809
                         const int width, const int height, int dir)
810 810
 {
811 811
     rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30,
812
-            r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
813
-                    : r->s.dsp.put_rv40_qpel_pixels_tab,
814
-            r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
815
-                    : r->s.dsp.put_rv40_chroma_pixels_tab);
812
+            r->rdsp.put_pixels_tab,
813
+            r->rdsp.put_chroma_pixels_tab);
816 814
 }
817 815
 
818 816
 static void rv34_mc_2mv(RV34DecContext *r, const int block_type)
819 817
 {
820 818
     rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30,
821
-            r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
822
-                    : r->s.dsp.put_rv40_qpel_pixels_tab,
823
-            r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
824
-                    : r->s.dsp.put_rv40_chroma_pixels_tab);
819
+            r->rdsp.put_pixels_tab,
820
+            r->rdsp.put_chroma_pixels_tab);
825 821
     rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30,
826
-            r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab
827
-                    : r->s.dsp.avg_rv40_qpel_pixels_tab,
828
-            r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab
829
-                    : r->s.dsp.avg_rv40_chroma_pixels_tab);
822
+            r->rdsp.avg_pixels_tab,
823
+            r->rdsp.avg_chroma_pixels_tab);
830 824
 }
831 825
 
832 826
 static void rv34_mc_2mv_skip(RV34DecContext *r)
... ...
@@ -835,15 +829,11 @@ static void rv34_mc_2mv_skip(RV34DecContext *r)
835 835
     for(j = 0; j < 2; j++)
836 836
         for(i = 0; i < 2; i++){
837 837
              rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30,
838
-                    r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
839
-                            : r->s.dsp.put_rv40_qpel_pixels_tab,
840
-                    r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
841
-                            : r->s.dsp.put_rv40_chroma_pixels_tab);
838
+                     r->rdsp.put_pixels_tab,
839
+                     r->rdsp.put_chroma_pixels_tab);
842 840
              rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30,
843
-                    r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab
844
-                            : r->s.dsp.avg_rv40_qpel_pixels_tab,
845
-                    r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab
846
-                            : r->s.dsp.avg_rv40_chroma_pixels_tab);
841
+                     r->rdsp.avg_pixels_tab,
842
+                     r->rdsp.avg_chroma_pixels_tab);
847 843
         }
848 844
 }
849 845
 
... ...
@@ -1363,6 +1353,15 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
1363 1363
 
1364 1364
     ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
1365 1365
 
1366
+#if CONFIG_RV30_DECODER
1367
+    if (avctx->codec_id == CODEC_ID_RV30)
1368
+        ff_rv30dsp_init(&r->rdsp, &r->s.dsp);
1369
+#endif
1370
+#if CONFIG_RV40_DECODER
1371
+    if (avctx->codec_id == CODEC_ID_RV40)
1372
+        ff_rv40dsp_init(&r->rdsp, &r->s.dsp);
1373
+#endif
1374
+
1366 1375
     r->intra_types_stride = 4*s->mb_stride + 4;
1367 1376
     r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
1368 1377
     r->intra_types = r->intra_types_hist + r->intra_types_stride * 4;
... ...
@@ -32,6 +32,7 @@
32 32
 #include "mpegvideo.h"
33 33
 
34 34
 #include "h264pred.h"
35
+#include "rv34dsp.h"
35 36
 
36 37
 #define MB_TYPE_SEPARATE_DC 0x01000000
37 38
 #define IS_SEPARATE_DC(a)   ((a) & MB_TYPE_SEPARATE_DC)
... ...
@@ -83,6 +84,7 @@ typedef struct SliceInfo{
83 83
 /** decoder context */
84 84
 typedef struct RV34DecContext{
85 85
     MpegEncContext s;
86
+    RV34DSPContext rdsp;
86 87
     int8_t *intra_types_hist;///< old block types, used for prediction
87 88
     int8_t *intra_types;     ///< block types
88 89
     int    intra_types_stride;///< block types array stride
89 90
new file mode 100644
... ...
@@ -0,0 +1,44 @@
0
+/*
1
+ * RV30/40 decoder motion compensation functions
2
+ * Copyright (c) 2008 Konstantin Shishkov
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/**
22
+ * @file
23
+ * RV30/40 decoder motion compensation functions
24
+ */
25
+
26
+#ifndef AVCODEC_RV34DSP_H
27
+#define AVCODEC_RV34DSP_H
28
+
29
+#include "dsputil.h"
30
+
31
+typedef struct RV34DSPContext {
32
+    qpel_mc_func put_pixels_tab[4][16];
33
+    qpel_mc_func avg_pixels_tab[4][16];
34
+    h264_chroma_mc_func put_chroma_pixels_tab[3];
35
+    h264_chroma_mc_func avg_chroma_pixels_tab[3];
36
+} RV34DSPContext;
37
+
38
+void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);
39
+void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp);
40
+
41
+void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp);
42
+
43
+#endif /* AVCODEC_RV34DSP_H */
... ...
@@ -26,6 +26,7 @@
26 26
 
27 27
 #include "avcodec.h"
28 28
 #include "dsputil.h"
29
+#include "rv34dsp.h"
29 30
 
30 31
 #define RV40_LOWPASS(OPNAME, OP) \
31 32
 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
... ...
@@ -284,70 +285,77 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
284 284
 RV40_CHROMA_MC(put_, op_put)
285 285
 RV40_CHROMA_MC(avg_, op_avg)
286 286
 
287
-void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx) {
288
-    c->put_rv40_qpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0];
289
-    c->put_rv40_qpel_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
290
-    c->put_rv40_qpel_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c;
291
-    c->put_rv40_qpel_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
292
-    c->put_rv40_qpel_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
293
-    c->put_rv40_qpel_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
294
-    c->put_rv40_qpel_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
295
-    c->put_rv40_qpel_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
296
-    c->put_rv40_qpel_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c;
297
-    c->put_rv40_qpel_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
298
-    c->put_rv40_qpel_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
299
-    c->put_rv40_qpel_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
300
-    c->put_rv40_qpel_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
301
-    c->put_rv40_qpel_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
302
-    c->put_rv40_qpel_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
303
-    c->avg_rv40_qpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0];
304
-    c->avg_rv40_qpel_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
305
-    c->avg_rv40_qpel_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c;
306
-    c->avg_rv40_qpel_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
307
-    c->avg_rv40_qpel_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
308
-    c->avg_rv40_qpel_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
309
-    c->avg_rv40_qpel_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
310
-    c->avg_rv40_qpel_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
311
-    c->avg_rv40_qpel_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c;
312
-    c->avg_rv40_qpel_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
313
-    c->avg_rv40_qpel_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
314
-    c->avg_rv40_qpel_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
315
-    c->avg_rv40_qpel_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
316
-    c->avg_rv40_qpel_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
317
-    c->avg_rv40_qpel_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
318
-    c->put_rv40_qpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0];
319
-    c->put_rv40_qpel_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
320
-    c->put_rv40_qpel_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c;
321
-    c->put_rv40_qpel_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
322
-    c->put_rv40_qpel_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
323
-    c->put_rv40_qpel_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
324
-    c->put_rv40_qpel_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
325
-    c->put_rv40_qpel_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
326
-    c->put_rv40_qpel_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c;
327
-    c->put_rv40_qpel_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
328
-    c->put_rv40_qpel_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
329
-    c->put_rv40_qpel_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
330
-    c->put_rv40_qpel_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
331
-    c->put_rv40_qpel_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
332
-    c->put_rv40_qpel_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
333
-    c->avg_rv40_qpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0];
334
-    c->avg_rv40_qpel_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
335
-    c->avg_rv40_qpel_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c;
336
-    c->avg_rv40_qpel_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
337
-    c->avg_rv40_qpel_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
338
-    c->avg_rv40_qpel_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
339
-    c->avg_rv40_qpel_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
340
-    c->avg_rv40_qpel_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
341
-    c->avg_rv40_qpel_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c;
342
-    c->avg_rv40_qpel_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
343
-    c->avg_rv40_qpel_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
344
-    c->avg_rv40_qpel_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
345
-    c->avg_rv40_qpel_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
346
-    c->avg_rv40_qpel_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
347
-    c->avg_rv40_qpel_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
287
+av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
288
+    c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0];
289
+    c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
290
+    c->put_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c;
291
+    c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
292
+    c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
293
+    c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
294
+    c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
295
+    c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
296
+    c->put_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c;
297
+    c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
298
+    c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
299
+    c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
300
+    c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
301
+    c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
302
+    c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
303
+    c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_c;
304
+    c->avg_pixels_tab[0][ 0] = dsp->avg_h264_qpel_pixels_tab[0][0];
305
+    c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
306
+    c->avg_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c;
307
+    c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
308
+    c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
309
+    c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
310
+    c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
311
+    c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
312
+    c->avg_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c;
313
+    c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
314
+    c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
315
+    c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
316
+    c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
317
+    c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
318
+    c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
319
+    c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_c;
320
+    c->put_pixels_tab[1][ 0] = dsp->put_h264_qpel_pixels_tab[1][0];
321
+    c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
322
+    c->put_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c;
323
+    c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
324
+    c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
325
+    c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
326
+    c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
327
+    c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
328
+    c->put_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c;
329
+    c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
330
+    c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
331
+    c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
332
+    c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
333
+    c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
334
+    c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
335
+    c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_c;
336
+    c->avg_pixels_tab[1][ 0] = dsp->avg_h264_qpel_pixels_tab[1][0];
337
+    c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
338
+    c->avg_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c;
339
+    c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
340
+    c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
341
+    c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
342
+    c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
343
+    c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
344
+    c->avg_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c;
345
+    c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
346
+    c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
347
+    c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
348
+    c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
349
+    c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
350
+    c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
351
+    c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_c;
348 352
 
349
-    c->put_rv40_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
350
-    c->put_rv40_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
351
-    c->avg_rv40_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
352
-    c->avg_rv40_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
353
+    c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
354
+    c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
355
+    c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
356
+    c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
357
+
358
+    if (HAVE_MMX)
359
+        ff_rv40dsp_init_x86(c, dsp);
353 360
 }
... ...
@@ -21,6 +21,8 @@ YASM-OBJS-$(CONFIG_H264PRED)           += x86/h264_intrapred.o          \
21 21
                                           x86/h264_intrapred_10bit.o
22 22
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
23 23
 
24
+MMX-OBJS-$(CONFIG_RV40_DECODER)        += x86/rv40dsp.o                 \
25
+
24 26
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp_yasm.o
25 27
 
26 28
 MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_mmx.o
... ...
@@ -1895,29 +1895,17 @@ PREFETCH(prefetch_3dnow, prefetch)
1895 1895
 
1896 1896
 void ff_put_h264_chroma_mc8_mmx_rnd   (uint8_t *dst, uint8_t *src,
1897 1897
                                        int stride, int h, int x, int y);
1898
-void ff_put_rv40_chroma_mc8_mmx       (uint8_t *dst, uint8_t *src,
1899
-                                       int stride, int h, int x, int y);
1900 1898
 void ff_avg_h264_chroma_mc8_mmx2_rnd  (uint8_t *dst, uint8_t *src,
1901 1899
                                        int stride, int h, int x, int y);
1902
-void ff_avg_rv40_chroma_mc8_mmx2      (uint8_t *dst, uint8_t *src,
1903
-                                       int stride, int h, int x, int y);
1904 1900
 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
1905 1901
                                        int stride, int h, int x, int y);
1906
-void ff_avg_rv40_chroma_mc8_3dnow     (uint8_t *dst, uint8_t *src,
1907
-                                       int stride, int h, int x, int y);
1908 1902
 
1909 1903
 void ff_put_h264_chroma_mc4_mmx       (uint8_t *dst, uint8_t *src,
1910 1904
                                        int stride, int h, int x, int y);
1911
-void ff_put_rv40_chroma_mc4_mmx       (uint8_t *dst, uint8_t *src,
1912
-                                       int stride, int h, int x, int y);
1913 1905
 void ff_avg_h264_chroma_mc4_mmx2      (uint8_t *dst, uint8_t *src,
1914 1906
                                        int stride, int h, int x, int y);
1915
-void ff_avg_rv40_chroma_mc4_mmx2      (uint8_t *dst, uint8_t *src,
1916
-                                       int stride, int h, int x, int y);
1917 1907
 void ff_avg_h264_chroma_mc4_3dnow     (uint8_t *dst, uint8_t *src,
1918 1908
                                        int stride, int h, int x, int y);
1919
-void ff_avg_rv40_chroma_mc4_3dnow     (uint8_t *dst, uint8_t *src,
1920
-                                       int stride, int h, int x, int y);
1921 1909
 
1922 1910
 void ff_put_h264_chroma_mc2_mmx2      (uint8_t *dst, uint8_t *src,
1923 1911
                                        int stride, int h, int x, int y);
... ...
@@ -2573,9 +2561,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2573 2573
         c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2574 2574
         }
2575 2575
 
2576
-        c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2577
-        c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
2578
-
2579 2576
         c->vector_clip_int32 = ff_vector_clip_int32_mmx;
2580 2577
 #endif
2581 2578
 
... ...
@@ -2675,9 +2660,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2675 2675
             SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
2676 2676
 
2677 2677
 #if HAVE_YASM
2678
-            c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2679
-            c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2680
-
2681 2678
             if (!high_bit_depth) {
2682 2679
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2683 2680
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
... ...
@@ -2760,8 +2742,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2760 2760
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2761 2761
             }
2762 2762
 
2763
-            c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2764
-            c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
2765 2763
 #endif
2766 2764
         }
2767 2765
 
2768 2766
new file mode 100644
... ...
@@ -0,0 +1,60 @@
0
+/*
1
+ * RV40 decoder motion compensation functions x86-optimised
2
+ * Copyright (c) 2008 Konstantin Shishkov
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/**
22
+ * @file
23
+ * RV40 decoder motion compensation functions x86-optimised
24
+ */
25
+
26
+#include "libavcodec/rv34dsp.h"
27
+
28
+void ff_put_rv40_chroma_mc8_mmx  (uint8_t *dst, uint8_t *src,
29
+                                  int stride, int h, int x, int y);
30
+void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
31
+                                  int stride, int h, int x, int y);
32
+void ff_avg_rv40_chroma_mc8_3dnow(uint8_t *dst, uint8_t *src,
33
+                                  int stride, int h, int x, int y);
34
+
35
+void ff_put_rv40_chroma_mc4_mmx  (uint8_t *dst, uint8_t *src,
36
+                                  int stride, int h, int x, int y);
37
+void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
38
+                                  int stride, int h, int x, int y);
39
+void ff_avg_rv40_chroma_mc4_3dnow(uint8_t *dst, uint8_t *src,
40
+                                  int stride, int h, int x, int y);
41
+
42
+void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
43
+{
44
+    av_unused int mm_flags = av_get_cpu_flags();
45
+
46
+#if HAVE_YASM
47
+    if (mm_flags & AV_CPU_FLAG_MMX) {
48
+        c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
49
+        c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
50
+    }
51
+    if (mm_flags & AV_CPU_FLAG_MMX2) {
52
+        c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
53
+        c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
54
+    } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
55
+        c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
56
+        c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
57
+    }
58
+#endif
59
+}