Browse code

dsputil: move VC1-specific stuff into VC1DSPContext. (cherry picked from commit 12802ec0601c3bd7b9c7a2503518e28fd5e7d744)

Ronald S. Bultje authored on 2011/02/18 04:45:03
Showing 13 changed files
... ...
@@ -1600,54 +1600,6 @@ H264_CHROMA_MC(avg_       , op_avg)
1600 1600
 #undef op_avg
1601 1601
 #undef op_put
1602 1602
 
1603
-static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1604
-    const int A=(8-x)*(8-y);
1605
-    const int B=(  x)*(8-y);
1606
-    const int C=(8-x)*(  y);
1607
-    const int D=(  x)*(  y);
1608
-    int i;
1609
-
1610
-    assert(x<8 && y<8 && x>=0 && y>=0);
1611
-
1612
-    for(i=0; i<h; i++)
1613
-    {
1614
-        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1615
-        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1616
-        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1617
-        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1618
-        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1619
-        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1620
-        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1621
-        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1622
-        dst+= stride;
1623
-        src+= stride;
1624
-    }
1625
-}
1626
-
1627
-static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1628
-    const int A=(8-x)*(8-y);
1629
-    const int B=(  x)*(8-y);
1630
-    const int C=(8-x)*(  y);
1631
-    const int D=(  x)*(  y);
1632
-    int i;
1633
-
1634
-    assert(x<8 && y<8 && x>=0 && y>=0);
1635
-
1636
-    for(i=0; i<h; i++)
1637
-    {
1638
-        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1639
-        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1640
-        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1641
-        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1642
-        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1643
-        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1644
-        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1645
-        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1646
-        dst+= stride;
1647
-        src+= stride;
1648
-    }
1649
-}
1650
-
1651 1603
 #define QPEL_MC(r, OPNAME, RND, OP) \
1652 1604
 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1653 1605
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
... ...
@@ -4301,17 +4253,12 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
4301 4301
     c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
4302 4302
     c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
4303 4303
     c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
4304
-    c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
4305
-    c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
4306 4304
 
4307 4305
     c->draw_edges = draw_edges_c;
4308 4306
 
4309 4307
 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
4310 4308
     ff_mlp_init(c, avctx);
4311 4309
 #endif
4312
-#if CONFIG_VC1_DECODER
4313
-    ff_vc1dsp_init(c,avctx);
4314
-#endif
4315 4310
 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
4316 4311
     ff_intrax8dsp_init(c,avctx);
4317 4312
 #endif
... ...
@@ -341,9 +341,6 @@ typedef struct DSPContext {
341 341
      */
342 342
     h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
343 343
     h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
344
-    /* This is really one func used in VC-1 decoding */
345
-    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
346
-    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
347 344
 
348 345
     qpel_mc_func put_h264_qpel_pixels_tab[4][16];
349 346
     qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
... ...
@@ -503,29 +500,6 @@ typedef struct DSPContext {
503 503
                                unsigned int filter_shift, int32_t mask, int blocksize,
504 504
                                int32_t *sample_buffer);
505 505
 
506
-    /* vc1 functions */
507
-    void (*vc1_inv_trans_8x8)(DCTELEM *b);
508
-    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
509
-    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
510
-    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
511
-    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
512
-    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
513
-    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
514
-    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
515
-    void (*vc1_v_overlap)(uint8_t* src, int stride);
516
-    void (*vc1_h_overlap)(uint8_t* src, int stride);
517
-    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
518
-    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
519
-    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
520
-    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
521
-    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
522
-    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
523
-    /* put 8x8 block with bicubic interpolation and quarterpel precision
524
-     * last argument is actually round value instead of height
525
-     */
526
-    op_pixels_func put_vc1_mspel_pixels_tab[16];
527
-    op_pixels_func avg_vc1_mspel_pixels_tab[16];
528
-
529 506
     /* intrax8 functions */
530 507
     void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
531 508
     void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
... ...
@@ -629,7 +603,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
629 629
 void ff_dsputil_init_dwt(DSPContext *c);
630 630
 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
631 631
 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
632
-void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
633 632
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
634 633
 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
635 634
 void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
... ...
@@ -43,7 +43,6 @@ void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block);
43 43
 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
44 44
 
45 45
 void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
46
-void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
47 46
 void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
48 47
 void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
49 48
 
... ...
@@ -171,8 +171,6 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
171 171
 
172 172
     if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
173 173
         dsputil_init_altivec(c, avctx);
174
-        if(CONFIG_VC1_DECODER)
175
-            vc1dsp_init_altivec(c, avctx);
176 174
         float_init_altivec(c, avctx);
177 175
         int_init_altivec(c, avctx);
178 176
         c->gmc1 = gmc1_altivec;
... ...
@@ -322,7 +322,11 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
322 322
 }
323 323
 
324 324
 
325
-void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
325
+void ff_vc1dsp_init_altivec(VC1DSPContext* dsp)
326
+{
327
+    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
328
+        return;
329
+
326 330
     dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
327 331
     dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
328 332
 }
... ...
@@ -337,14 +337,14 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte
337 337
     v->res_fasttx = get_bits1(gb);
338 338
     if (!v->res_fasttx)
339 339
     {
340
-        v->s.dsp.vc1_inv_trans_8x8 = ff_simple_idct;
341
-        v->s.dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
342
-        v->s.dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
343
-        v->s.dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
344
-        v->s.dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add;
345
-        v->s.dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
346
-        v->s.dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
347
-        v->s.dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
340
+        v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
341
+        v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
342
+        v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
343
+        v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
344
+        v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add;
345
+        v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
346
+        v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
347
+        v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
348 348
     }
349 349
 
350 350
     v->fastuvmc =  get_bits1(gb); //common
... ...
@@ -26,6 +26,7 @@
26 26
 #include "avcodec.h"
27 27
 #include "mpegvideo.h"
28 28
 #include "intrax8.h"
29
+#include "vc1dsp.h"
29 30
 
30 31
 /** Markers used in VC-1 AP frame data */
31 32
 //@{
... ...
@@ -155,6 +156,7 @@ enum COTypes {
155 155
 typedef struct VC1Context{
156 156
     MpegEncContext s;
157 157
     IntraX8Context x8;
158
+    VC1DSPContext vc1dsp;
158 159
 
159 160
     int bits;
160 161
 
... ...
@@ -160,29 +160,30 @@ enum Imode {
160 160
 
161 161
 /** @} */ //Bitplane group
162 162
 
163
-static void vc1_loop_filter_iblk(MpegEncContext *s, int pq)
163
+static void vc1_loop_filter_iblk(VC1Context *v, int pq)
164 164
 {
165
+    MpegEncContext *s = &v->s;
165 166
     int j;
166 167
     if (!s->first_slice_line) {
167
-        s->dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
168
+        v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
168 169
         if (s->mb_x)
169
-            s->dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize, s->linesize, pq);
170
-        s->dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize+8, s->linesize, pq);
170
+            v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize, s->linesize, pq);
171
+        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize+8, s->linesize, pq);
171 172
         for(j = 0; j < 2; j++){
172
-            s->dsp.vc1_v_loop_filter8(s->dest[j+1], s->uvlinesize, pq);
173
+            v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1], s->uvlinesize, pq);
173 174
             if (s->mb_x)
174
-                s->dsp.vc1_h_loop_filter8(s->dest[j+1]-8*s->uvlinesize, s->uvlinesize, pq);
175
+                v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1]-8*s->uvlinesize, s->uvlinesize, pq);
175 176
         }
176 177
     }
177
-    s->dsp.vc1_v_loop_filter16(s->dest[0] + 8*s->linesize, s->linesize, pq);
178
+    v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8*s->linesize, s->linesize, pq);
178 179
 
179 180
     if (s->mb_y == s->mb_height-1) {
180 181
         if (s->mb_x) {
181
-            s->dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
182
-            s->dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
183
-            s->dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
182
+            v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
183
+            v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
184
+            v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
184 185
         }
185
-        s->dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
186
+        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
186 187
     }
187 188
 }
188 189
 
... ...
@@ -342,11 +343,11 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
342 342
 
343 343
     if(s->mspel) {
344 344
         dxy = ((my & 3) << 2) | (mx & 3);
345
-        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
346
-        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
345
+        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
346
+        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
347 347
         srcY += s->linesize * 8;
348
-        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
349
-        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
348
+        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
349
+        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
350 350
     } else { // hpel mc - always used for luma
351 351
         dxy = (my & 2) | ((mx & 2) >> 1);
352 352
 
... ...
@@ -364,8 +365,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
364 364
         dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
365 365
         dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
366 366
     }else{
367
-        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
368
-        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
367
+        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
368
+        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
369 369
     }
370 370
 }
371 371
 
... ...
@@ -433,7 +434,7 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n)
433 433
 
434 434
     if(s->mspel) {
435 435
         dxy = ((my & 3) << 2) | (mx & 3);
436
-        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd);
436
+        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd);
437 437
     } else { // hpel mc - always used for luma
438 438
         dxy = (my & 2) | ((mx & 2) >> 1);
439 439
         if(!v->rnd)
... ...
@@ -583,8 +584,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v)
583 583
         dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
584 584
         dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
585 585
     }else{
586
-        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
587
-        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
586
+        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
587
+        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
588 588
     }
589 589
 }
590 590
 
... ...
@@ -906,11 +907,11 @@ static void vc1_interp_mc(VC1Context *v)
906 906
 
907 907
     if(s->mspel) {
908 908
         dxy = ((my & 3) << 2) | (mx & 3);
909
-        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
910
-        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
909
+        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
910
+        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
911 911
         srcY += s->linesize * 8;
912
-        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
913
-        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
912
+        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
913
+        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
914 914
     } else { // hpel mc
915 915
         dxy = (my & 2) | ((mx & 2) >> 1);
916 916
 
... ...
@@ -928,8 +929,8 @@ static void vc1_interp_mc(VC1Context *v)
928 928
         dsp->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
929 929
         dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
930 930
     }else{
931
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
932
-        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
931
+        v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
932
+        v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
933 933
     }
934 934
 }
935 935
 
... ...
@@ -2039,15 +2040,15 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
2039 2039
         }
2040 2040
         if(!skip_block){
2041 2041
             if(i==1)
2042
-                s->dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
2042
+                v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
2043 2043
             else{
2044
-                s->dsp.vc1_inv_trans_8x8(block);
2044
+                v->vc1dsp.vc1_inv_trans_8x8(block);
2045 2045
                 s->dsp.add_pixels_clamped(block, dst, linesize);
2046 2046
             }
2047 2047
             if(apply_filter && cbp_top  & 0xC)
2048
-                s->dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
2048
+                v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
2049 2049
             if(apply_filter && cbp_left & 0xA)
2050
-                s->dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
2050
+                v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
2051 2051
         }
2052 2052
         break;
2053 2053
     case TT_4X4:
... ...
@@ -2068,13 +2069,13 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
2068 2068
             }
2069 2069
             if(!(subblkpat & (1 << (3 - j))) && !skip_block){
2070 2070
                 if(i==1)
2071
-                    s->dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2071
+                    v->vc1dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2072 2072
                 else
2073
-                    s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2073
+                    v->vc1dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2074 2074
                 if(apply_filter && (j&2 ? pat & (1<<(j-2)) : (cbp_top & (1 << (j + 2)))))
2075
-                    s->dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2075
+                    v->vc1dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2076 2076
                 if(apply_filter && (j&1 ? pat & (1<<(j-1)) : (cbp_left & (1 << (j + 1)))))
2077
-                    s->dsp.vc1_h_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2077
+                    v->vc1dsp.vc1_h_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2078 2078
             }
2079 2079
         }
2080 2080
         break;
... ...
@@ -2096,13 +2097,13 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
2096 2096
             }
2097 2097
             if(!(subblkpat & (1 << (1 - j))) && !skip_block){
2098 2098
                 if(i==1)
2099
-                    s->dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off);
2099
+                    v->vc1dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off);
2100 2100
                 else
2101
-                    s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
2101
+                    v->vc1dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
2102 2102
                 if(apply_filter && j ? pat & 0x3 : (cbp_top & 0xC))
2103
-                    s->dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq);
2103
+                    v->vc1dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq);
2104 2104
                 if(apply_filter && cbp_left & (2 << j))
2105
-                    s->dsp.vc1_h_loop_filter4(dst + j*4*linesize, linesize, v->pq);
2105
+                    v->vc1dsp.vc1_h_loop_filter4(dst + j*4*linesize, linesize, v->pq);
2106 2106
             }
2107 2107
         }
2108 2108
         break;
... ...
@@ -2124,13 +2125,13 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
2124 2124
             }
2125 2125
             if(!(subblkpat & (1 << (1 - j))) && !skip_block){
2126 2126
                 if(i==1)
2127
-                    s->dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off);
2127
+                    v->vc1dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off);
2128 2128
                 else
2129
-                    s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
2129
+                    v->vc1dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
2130 2130
                 if(apply_filter && cbp_top & (2 << j))
2131
-                    s->dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq);
2131
+                    v->vc1dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq);
2132 2132
                 if(apply_filter && j ? pat & 0x5 : (cbp_left & 0xA))
2133
-                    s->dsp.vc1_h_loop_filter8(dst + j*4, linesize, v->pq);
2133
+                    v->vc1dsp.vc1_h_loop_filter8(dst + j*4, linesize, v->pq);
2134 2134
             }
2135 2135
         }
2136 2136
         break;
... ...
@@ -2232,14 +2233,14 @@ static int vc1_decode_p_mb(VC1Context *v)
2232 2232
 
2233 2233
                     vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
2234 2234
                     if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2235
-                    s->dsp.vc1_inv_trans_8x8(s->block[i]);
2235
+                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2236 2236
                     if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2237 2237
                     s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2238 2238
                     if(v->pq >= 9 && v->overlap) {
2239 2239
                         if(v->c_avail)
2240
-                            s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2240
+                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2241 2241
                         if(v->a_avail)
2242
-                            s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2242
+                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2243 2243
                     }
2244 2244
                     if(apply_loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
2245 2245
                         int left_cbp, top_cbp;
... ...
@@ -2251,9 +2252,9 @@ static int vc1_decode_p_mb(VC1Context *v)
2251 2251
                             top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2252 2252
                         }
2253 2253
                         if(left_cbp & 0xC)
2254
-                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2254
+                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2255 2255
                         if(top_cbp  & 0xA)
2256
-                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2256
+                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2257 2257
                     }
2258 2258
                     block_cbp |= 0xF << (i << 2);
2259 2259
                 } else if(val) {
... ...
@@ -2268,9 +2269,9 @@ static int vc1_decode_p_mb(VC1Context *v)
2268 2268
                             top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2269 2269
                         }
2270 2270
                         if(left_cbp & 0xC)
2271
-                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2271
+                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2272 2272
                         if(top_cbp  & 0xA)
2273
-                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2273
+                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2274 2274
                     }
2275 2275
                     pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp);
2276 2276
                     block_cbp |= pat << (i << 2);
... ...
@@ -2363,14 +2364,14 @@ static int vc1_decode_p_mb(VC1Context *v)
2363 2363
 
2364 2364
                     vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset);
2365 2365
                     if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2366
-                    s->dsp.vc1_inv_trans_8x8(s->block[i]);
2366
+                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2367 2367
                     if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2368 2368
                     s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
2369 2369
                     if(v->pq >= 9 && v->overlap) {
2370 2370
                         if(v->c_avail)
2371
-                            s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2371
+                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2372 2372
                         if(v->a_avail)
2373
-                            s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2373
+                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2374 2374
                     }
2375 2375
                     if(v->s.loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
2376 2376
                         int left_cbp, top_cbp;
... ...
@@ -2382,9 +2383,9 @@ static int vc1_decode_p_mb(VC1Context *v)
2382 2382
                             top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2383 2383
                         }
2384 2384
                         if(left_cbp & 0xC)
2385
-                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2385
+                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2386 2386
                         if(top_cbp  & 0xA)
2387
-                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2387
+                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2388 2388
                     }
2389 2389
                     block_cbp |= 0xF << (i << 2);
2390 2390
                 } else if(is_coded[i]) {
... ...
@@ -2399,9 +2400,9 @@ static int vc1_decode_p_mb(VC1Context *v)
2399 2399
                             top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2400 2400
                         }
2401 2401
                         if(left_cbp & 0xC)
2402
-                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2402
+                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2403 2403
                         if(top_cbp  & 0xA)
2404
-                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2404
+                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2405 2405
                     }
2406 2406
                     pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp);
2407 2407
                     block_cbp |= pat << (i << 2);
... ...
@@ -2568,7 +2569,7 @@ static void vc1_decode_b_mb(VC1Context *v)
2568 2568
 
2569 2569
             vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
2570 2570
             if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2571
-            s->dsp.vc1_inv_trans_8x8(s->block[i]);
2571
+            v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2572 2572
             if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2573 2573
             s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2574 2574
         } else if(val) {
... ...
@@ -2650,7 +2651,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
2650 2650
 
2651 2651
                 vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2);
2652 2652
 
2653
-                s->dsp.vc1_inv_trans_8x8(s->block[k]);
2653
+                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
2654 2654
                 if(v->pq >= 9 && v->overlap) {
2655 2655
                     for(j = 0; j < 64; j++) s->block[k][j] += 128;
2656 2656
                 }
... ...
@@ -2659,27 +2660,27 @@ static void vc1_decode_i_blocks(VC1Context *v)
2659 2659
             vc1_put_block(v, s->block);
2660 2660
             if(v->pq >= 9 && v->overlap) {
2661 2661
                 if(s->mb_x) {
2662
-                    s->dsp.vc1_h_overlap(s->dest[0], s->linesize);
2663
-                    s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2662
+                    v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
2663
+                    v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2664 2664
                     if(!(s->flags & CODEC_FLAG_GRAY)) {
2665
-                        s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2666
-                        s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2665
+                        v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2666
+                        v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2667 2667
                     }
2668 2668
                 }
2669
-                s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2670
-                s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2669
+                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2670
+                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2671 2671
                 if(!s->first_slice_line) {
2672
-                    s->dsp.vc1_v_overlap(s->dest[0], s->linesize);
2673
-                    s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2672
+                    v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
2673
+                    v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2674 2674
                     if(!(s->flags & CODEC_FLAG_GRAY)) {
2675
-                        s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2676
-                        s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2675
+                        v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2676
+                        v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2677 2677
                     }
2678 2678
                 }
2679
-                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2680
-                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2679
+                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2680
+                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2681 2681
             }
2682
-            if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq);
2682
+            if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
2683 2683
 
2684 2684
             if(get_bits_count(&s->gb) > v->bits) {
2685 2685
                 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
... ...
@@ -2790,34 +2791,34 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
2790 2790
 
2791 2791
                 vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
2792 2792
 
2793
-                s->dsp.vc1_inv_trans_8x8(s->block[k]);
2793
+                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
2794 2794
                 for(j = 0; j < 64; j++) s->block[k][j] += 128;
2795 2795
             }
2796 2796
 
2797 2797
             vc1_put_block(v, s->block);
2798 2798
             if(overlap) {
2799 2799
                 if(s->mb_x) {
2800
-                    s->dsp.vc1_h_overlap(s->dest[0], s->linesize);
2801
-                    s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2800
+                    v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
2801
+                    v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2802 2802
                     if(!(s->flags & CODEC_FLAG_GRAY)) {
2803
-                        s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2804
-                        s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2803
+                        v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2804
+                        v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2805 2805
                     }
2806 2806
                 }
2807
-                s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2808
-                s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2807
+                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2808
+                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2809 2809
                 if(!s->first_slice_line) {
2810
-                    s->dsp.vc1_v_overlap(s->dest[0], s->linesize);
2811
-                    s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2810
+                    v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
2811
+                    v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2812 2812
                     if(!(s->flags & CODEC_FLAG_GRAY)) {
2813
-                        s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2814
-                        s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2813
+                        v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2814
+                        v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2815 2815
                     }
2816 2816
                 }
2817
-                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2818
-                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2817
+                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2818
+                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2819 2819
             }
2820
-            if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq);
2820
+            if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
2821 2821
 
2822 2822
             if(get_bits_count(&s->gb) > v->bits) {
2823 2823
                 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
... ...
@@ -2929,7 +2930,7 @@ static void vc1_decode_b_blocks(VC1Context *v)
2929 2929
                 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
2930 2930
                 return;
2931 2931
             }
2932
-            if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq);
2932
+            if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
2933 2933
         }
2934 2934
         if (!v->s.loop_filter)
2935 2935
             ff_draw_horiz_band(s, s->mb_y * 16, 16);
... ...
@@ -3023,6 +3024,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
3023 3023
     if(ff_msmpeg4_decode_init(avctx) < 0)
3024 3024
         return -1;
3025 3025
     if (vc1_init_common(v) < 0) return -1;
3026
+    ff_vc1dsp_init(&v->vc1dsp);
3026 3027
     for (i = 0; i < 64;  i++) {
3027 3028
 #define transpose(x) ((x>>3) | ((x&7)<<3))
3028 3029
         v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
... ...
@@ -25,7 +25,7 @@
25 25
  *
26 26
  */
27 27
 
28
-#include "dsputil.h"
28
+#include "vc1dsp.h"
29 29
 
30 30
 
31 31
 /** Apply overlap transform to horizontal edge
... ...
@@ -612,7 +612,56 @@ PUT_VC1_MSPEL(1, 3)
612 612
 PUT_VC1_MSPEL(2, 3)
613 613
 PUT_VC1_MSPEL(3, 3)
614 614
 
615
-av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
615
+static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
616
+    const int A=(8-x)*(8-y);
617
+    const int B=(  x)*(8-y);
618
+    const int C=(8-x)*(  y);
619
+    const int D=(  x)*(  y);
620
+    int i;
621
+
622
+    assert(x<8 && y<8 && x>=0 && y>=0);
623
+
624
+    for(i=0; i<h; i++)
625
+    {
626
+        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
627
+        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
628
+        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
629
+        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
630
+        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
631
+        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
632
+        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
633
+        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
634
+        dst+= stride;
635
+        src+= stride;
636
+    }
637
+}
638
+
639
+#define avg2(a,b) ((a+b+1)>>1)
640
+static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
641
+    const int A=(8-x)*(8-y);
642
+    const int B=(  x)*(8-y);
643
+    const int C=(8-x)*(  y);
644
+    const int D=(  x)*(  y);
645
+    int i;
646
+
647
+    assert(x<8 && y<8 && x>=0 && y>=0);
648
+
649
+    for(i=0; i<h; i++)
650
+    {
651
+        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
652
+        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
653
+        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
654
+        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
655
+        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
656
+        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
657
+        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
658
+        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
659
+        dst+= stride;
660
+        src+= stride;
661
+    }
662
+}
663
+
664
+av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
616 665
     dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
617 666
     dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
618 667
     dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
... ...
@@ -663,4 +712,12 @@ av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
663 663
     dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
664 664
     dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
665 665
     dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
666
+
667
+    dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
668
+    dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
669
+
670
+    if (HAVE_ALTIVEC)
671
+        ff_vc1dsp_init_altivec(dsp);
672
+    if (HAVE_MMX)
673
+        ff_vc1dsp_init_mmx(dsp);
666 674
 }
667 675
new file mode 100644
... ...
@@ -0,0 +1,67 @@
0
+/*
1
+ * VC-1 and WMV3 decoder - DSP functions
2
+ * Copyright (c) 2006 Konstantin Shishkov
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/**
22
+ * @file
23
+ * VC-1 and WMV3 decoder
24
+ *
25
+ */
26
+
27
+#ifndef AVCODEC_VC1DSP_H
28
+#define AVCODEC_VC1DSP_H
29
+
30
+#include "dsputil.h"
31
+
32
+typedef struct VC1DSPContext {
33
+    /* vc1 functions */
34
+    void (*vc1_inv_trans_8x8)(DCTELEM *b);
35
+    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
36
+    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
37
+    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
38
+    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
39
+    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
40
+    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
41
+    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
42
+    void (*vc1_v_overlap)(uint8_t* src, int stride);
43
+    void (*vc1_h_overlap)(uint8_t* src, int stride);
44
+    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
45
+    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
46
+    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
47
+    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
48
+    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
49
+    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
50
+
51
+    /* put 8x8 block with bicubic interpolation and quarterpel precision
52
+     * last argument is actually round value instead of height
53
+     */
54
+    op_pixels_func put_vc1_mspel_pixels_tab[16];
55
+    op_pixels_func avg_vc1_mspel_pixels_tab[16];
56
+
57
+    /* This is really one func used in VC-1 decoding */
58
+    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
59
+    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
60
+} VC1DSPContext;
61
+
62
+void ff_vc1dsp_init(VC1DSPContext* c);
63
+void ff_vc1dsp_init_altivec(VC1DSPContext* c);
64
+void ff_vc1dsp_init_mmx(VC1DSPContext* dsp);
65
+
66
+#endif /* AVCODEC_VC1DSP_H */
... ...
@@ -1894,20 +1894,14 @@ PREFETCH(prefetch_3dnow, prefetch)
1894 1894
 
1895 1895
 void ff_put_h264_chroma_mc8_mmx_rnd   (uint8_t *dst, uint8_t *src,
1896 1896
                                        int stride, int h, int x, int y);
1897
-void ff_put_vc1_chroma_mc8_mmx_nornd  (uint8_t *dst, uint8_t *src,
1898
-                                       int stride, int h, int x, int y);
1899 1897
 void ff_put_rv40_chroma_mc8_mmx       (uint8_t *dst, uint8_t *src,
1900 1898
                                        int stride, int h, int x, int y);
1901 1899
 void ff_avg_h264_chroma_mc8_mmx2_rnd  (uint8_t *dst, uint8_t *src,
1902 1900
                                        int stride, int h, int x, int y);
1903
-void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
1904
-                                       int stride, int h, int x, int y);
1905 1901
 void ff_avg_rv40_chroma_mc8_mmx2      (uint8_t *dst, uint8_t *src,
1906 1902
                                        int stride, int h, int x, int y);
1907 1903
 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
1908 1904
                                        int stride, int h, int x, int y);
1909
-void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
1910
-                                       int stride, int h, int x, int y);
1911 1905
 void ff_avg_rv40_chroma_mc8_3dnow     (uint8_t *dst, uint8_t *src,
1912 1906
                                        int stride, int h, int x, int y);
1913 1907
 
... ...
@@ -1931,15 +1925,11 @@ void ff_avg_h264_chroma_mc2_mmx2      (uint8_t *dst, uint8_t *src,
1931 1931
 
1932 1932
 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1933 1933
                                        int stride, int h, int x, int y);
1934
-void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1935
-                                       int stride, int h, int x, int y);
1936 1934
 void ff_put_h264_chroma_mc4_ssse3     (uint8_t *dst, uint8_t *src,
1937 1935
                                        int stride, int h, int x, int y);
1938 1936
 
1939 1937
 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1940 1938
                                        int stride, int h, int x, int y);
1941
-void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1942
-                                       int stride, int h, int x, int y);
1943 1939
 void ff_avg_h264_chroma_mc4_ssse3     (uint8_t *dst, uint8_t *src,
1944 1940
                                        int stride, int h, int x, int y);
1945 1941
 
... ...
@@ -2535,7 +2525,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2535 2535
 #if HAVE_YASM
2536 2536
         c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
2537 2537
         c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2538
-        c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
2539 2538
 
2540 2539
         c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2541 2540
         c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
... ...
@@ -2622,8 +2611,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2622 2622
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2623 2623
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2624 2624
 
2625
-            c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
2626
-
2627 2625
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2628 2626
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
2629 2627
             c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
... ...
@@ -2636,9 +2623,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2636 2636
                 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
2637 2637
 #endif
2638 2638
 
2639
-            if (CONFIG_VC1_DECODER)
2640
-                ff_vc1dsp_init_mmx(c, avctx);
2641
-
2642 2639
             c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
2643 2640
         } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
2644 2641
             c->prefetch = prefetch_3dnow;
... ...
@@ -2695,8 +2679,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2695 2695
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
2696 2696
             c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2697 2697
 
2698
-            c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
2699
-
2700 2698
             c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2701 2699
             c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
2702 2700
 #endif
... ...
@@ -2745,8 +2727,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2745 2745
             H264_QPEL_FUNCS(3, 3, ssse3);
2746 2746
             c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2747 2747
 #if HAVE_YASM
2748
-            c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
2749
-            c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
2750 2748
             c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
2751 2749
             c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
2752 2750
             c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
... ...
@@ -196,7 +196,6 @@ void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
196 196
 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
197 197
 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
198 198
 
199
-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
200 199
 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
201 200
 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
202 201
 
... ...
@@ -28,6 +28,7 @@
28 28
 #include "libavutil/x86_cpu.h"
29 29
 #include "libavcodec/dsputil.h"
30 30
 #include "dsputil_mmx.h"
31
+#include "libavcodec/vc1dsp.h"
31 32
 
32 33
 #define OP_PUT(S,D)
33 34
 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
... ...
@@ -712,30 +713,45 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
712 712
     ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
713 713
     ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
714 714
 }
715
+
715 716
 #endif
716 717
 
717
-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
718
+void ff_put_vc1_chroma_mc8_mmx_nornd  (uint8_t *dst, uint8_t *src,
719
+                                       int stride, int h, int x, int y);
720
+void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
721
+                                       int stride, int h, int x, int y);
722
+void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
723
+                                       int stride, int h, int x, int y);
724
+void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
725
+                                       int stride, int h, int x, int y);
726
+void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
727
+                                       int stride, int h, int x, int y);
728
+
729
+void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
730
+{
718 731
     int mm_flags = av_get_cpu_flags();
719 732
 
720
-    dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
721
-    dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
722
-    dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
723
-    dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
724
-
725
-    dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
726
-    dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
727
-    dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
728
-    dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
729
-
730
-    dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
731
-    dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
732
-    dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
733
-    dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
734
-
735
-    dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
736
-    dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
737
-    dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
738
-    dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
733
+    if (mm_flags & AV_CPU_FLAG_MMX) {
734
+        dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
735
+        dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
736
+        dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
737
+        dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
738
+
739
+        dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
740
+        dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
741
+        dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
742
+        dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
743
+
744
+        dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
745
+        dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
746
+        dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
747
+        dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
748
+
749
+        dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
750
+        dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
751
+        dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
752
+        dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
753
+    }
739 754
 
740 755
     if (mm_flags & AV_CPU_FLAG_MMX2){
741 756
         dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
... ...
@@ -775,11 +791,16 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
775 775
 #if HAVE_YASM
776 776
     if (mm_flags & AV_CPU_FLAG_MMX) {
777 777
         ASSIGN_LF(mmx);
778
+        dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
778 779
     }
779 780
     return;
780 781
     if (mm_flags & AV_CPU_FLAG_MMX2) {
781 782
         ASSIGN_LF(mmx2);
783
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
784
+    } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
785
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
782 786
     }
787
+
783 788
     if (mm_flags & AV_CPU_FLAG_SSE2) {
784 789
         dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
785 790
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
... ...
@@ -788,6 +809,8 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
788 788
     }
789 789
     if (mm_flags & AV_CPU_FLAG_SSSE3) {
790 790
         ASSIGN_LF(ssse3);
791
+        dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
792
+        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
791 793
     }
792 794
     if (mm_flags & AV_CPU_FLAG_SSE4) {
793 795
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;