Browse code

dsputil: Split off quarterpel bits into their own context

Diego Biurrun authored on 2014/01/08 22:00:10
Showing 30 changed files
... ...
@@ -1551,6 +1551,7 @@ CONFIG_EXTRA="
1551 1551
     mpegvideo
1552 1552
     mpegvideoenc
1553 1553
     nettle
1554
+    qpeldsp
1554 1555
     rangecoder
1555 1556
     riffdec
1556 1557
     riffenc
... ...
@@ -1704,7 +1705,7 @@ rdft_select="fft"
1704 1704
 mpegaudio_select="mpegaudiodsp"
1705 1705
 mpegaudiodsp_select="dct"
1706 1706
 mpegvideo_select="dsputil hpeldsp videodsp"
1707
-mpegvideoenc_select="dsputil mpegvideo"
1707
+mpegvideoenc_select="dsputil mpegvideo qpeldsp"
1708 1708
 
1709 1709
 # decoders / encoders
1710 1710
 aac_decoder_select="mdct sinewin"
... ...
@@ -1730,7 +1731,7 @@ atrac3p_decoder_select="mdct sinewin"
1730 1730
 bink_decoder_select="dsputil hpeldsp"
1731 1731
 binkaudio_dct_decoder_select="mdct rdft dct sinewin"
1732 1732
 binkaudio_rdft_decoder_select="mdct rdft sinewin"
1733
-cavs_decoder_select="dsputil golomb h264chroma videodsp"
1733
+cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp"
1734 1734
 cllc_decoder_select="dsputil"
1735 1735
 comfortnoise_encoder_select="lpc"
1736 1736
 cook_decoder_select="dsputil mdct sinewin"
... ...
@@ -1766,7 +1767,7 @@ g2m_decoder_deps="zlib"
1766 1766
 g2m_decoder_select="dsputil"
1767 1767
 h261_decoder_select="error_resilience mpegvideo"
1768 1768
 h261_encoder_select="aandcttables mpegvideoenc"
1769
-h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo"
1769
+h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp"
1770 1770
 h263_encoder_select="aandcttables h263dsp mpegvideoenc"
1771 1771
 h263i_decoder_select="h263_decoder"
1772 1772
 h263p_encoder_select="h263_encoder"
... ...
@@ -1818,7 +1819,7 @@ msmpeg4v2_decoder_select="h263_decoder"
1818 1818
 msmpeg4v2_encoder_select="h263_encoder"
1819 1819
 msmpeg4v3_decoder_select="h263_decoder"
1820 1820
 msmpeg4v3_encoder_select="h263_encoder"
1821
-mss2_decoder_select="error_resilience vc1_decoder"
1821
+mss2_decoder_select="error_resilience qpeldsp vc1_decoder"
1822 1822
 mxpeg_decoder_select="mjpeg_decoder"
1823 1823
 nellymoser_decoder_select="mdct sinewin"
1824 1824
 nellymoser_encoder_select="audio_frame_queue mdct sinewin"
... ...
@@ -1860,7 +1861,7 @@ twinvq_decoder_select="mdct lsp sinewin"
1860 1860
 utvideo_decoder_select="dsputil"
1861 1861
 utvideo_encoder_select="dsputil huffman huffyuvencdsp"
1862 1862
 vble_decoder_select="huffyuvdsp"
1863
-vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8"
1863
+vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
1864 1864
 vc1image_decoder_select="vc1_decoder"
1865 1865
 vorbis_decoder_select="mdct"
1866 1866
 vorbis_encoder_select="mdct"
... ...
@@ -1936,8 +1937,8 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
1936 1936
 
1937 1937
 # parsers
1938 1938
 h264_parser_select="h264_decoder"
1939
-mpeg4video_parser_select="error_resilience h263dsp mpegvideo"
1940 1939
 mpegvideo_parser_select="error_resilience mpegvideo"
1940
+mpeg4video_parser_select="error_resilience h263dsp mpegvideo qpeldsp"
1941 1941
 vc1_parser_select="mpegvideo"
1942 1942
 
1943 1943
 # external libraries
... ...
@@ -63,6 +63,7 @@ OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideo_motion.o \
63 63
                                           mpegutils.o
64 64
 OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
65 65
                                           motion_est.o ratecontrol.o
66
+OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
66 67
 OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
67 68
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
68 69
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
... ...
@@ -30,6 +30,7 @@
30 30
 #include "golomb.h"
31 31
 #include "h264chroma.h"
32 32
 #include "mathops.h"
33
+#include "qpeldsp.h"
33 34
 #include "cavs.h"
34 35
 
35 36
 static const uint8_t alpha_tab[64] = {
... ...
@@ -23,7 +23,9 @@
23 23
 #define AVCODEC_CAVSDSP_H
24 24
 
25 25
 #include <stdint.h>
26
-#include "dsputil.h"
26
+
27
+#include "avcodec.h"
28
+#include "qpeldsp.h"
27 29
 
28 30
 typedef struct CAVSDSPContext {
29 31
     qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
... ...
@@ -48,9 +48,6 @@ uint32_t ff_square_tab[512] = { 0, };
48 48
 #undef BIT_DEPTH
49 49
 
50 50
 #define BIT_DEPTH 8
51
-#include "hpel_template.c"
52
-#include "tpel_template.c"
53
-#include "dsputil_template.c"
54 51
 #include "dsputilenc_template.c"
55 52
 
56 53
 /* Input permutation for the simple_idct_mmx */
... ...
@@ -485,701 +482,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
485 485
     }
486 486
 }
487 487
 
488
-#define QPEL_MC(r, OPNAME, RND, OP)                                           \
489
-static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src,       \
490
-                                            int dstStride, int srcStride,     \
491
-                                            int h)                            \
492
-{                                                                             \
493
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
494
-    int i;                                                                    \
495
-                                                                              \
496
-    for (i = 0; i < h; i++) {                                                 \
497
-        OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
498
-        OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
499
-        OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
500
-        OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
501
-        OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
502
-        OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
503
-        OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
504
-        OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
505
-        dst += dstStride;                                                     \
506
-        src += srcStride;                                                     \
507
-    }                                                                         \
508
-}                                                                             \
509
-                                                                              \
510
-static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src,       \
511
-                                            int dstStride, int srcStride)     \
512
-{                                                                             \
513
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
514
-    const int w = 8;                                                          \
515
-    int i;                                                                    \
516
-                                                                              \
517
-    for (i = 0; i < w; i++) {                                                 \
518
-        const int src0 = src[0 * srcStride];                                  \
519
-        const int src1 = src[1 * srcStride];                                  \
520
-        const int src2 = src[2 * srcStride];                                  \
521
-        const int src3 = src[3 * srcStride];                                  \
522
-        const int src4 = src[4 * srcStride];                                  \
523
-        const int src5 = src[5 * srcStride];                                  \
524
-        const int src6 = src[6 * srcStride];                                  \
525
-        const int src7 = src[7 * srcStride];                                  \
526
-        const int src8 = src[8 * srcStride];                                  \
527
-        OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
528
-        OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
529
-        OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
530
-        OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
531
-        OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
532
-        OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
533
-        OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
534
-        OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
535
-        dst++;                                                                \
536
-        src++;                                                                \
537
-    }                                                                         \
538
-}                                                                             \
539
-                                                                              \
540
-static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src,      \
541
-                                             int dstStride, int srcStride,    \
542
-                                             int h)                           \
543
-{                                                                             \
544
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
545
-    int i;                                                                    \
546
-                                                                              \
547
-    for (i = 0; i < h; i++) {                                                 \
548
-        OP(dst[0],  (src[0]  + src[1])  * 20 - (src[0]  + src[2])  * 6 + (src[1]  + src[3])  * 3 - (src[2]  + src[4]));  \
549
-        OP(dst[1],  (src[1]  + src[2])  * 20 - (src[0]  + src[3])  * 6 + (src[0]  + src[4])  * 3 - (src[1]  + src[5]));  \
550
-        OP(dst[2],  (src[2]  + src[3])  * 20 - (src[1]  + src[4])  * 6 + (src[0]  + src[5])  * 3 - (src[0]  + src[6]));  \
551
-        OP(dst[3],  (src[3]  + src[4])  * 20 - (src[2]  + src[5])  * 6 + (src[1]  + src[6])  * 3 - (src[0]  + src[7]));  \
552
-        OP(dst[4],  (src[4]  + src[5])  * 20 - (src[3]  + src[6])  * 6 + (src[2]  + src[7])  * 3 - (src[1]  + src[8]));  \
553
-        OP(dst[5],  (src[5]  + src[6])  * 20 - (src[4]  + src[7])  * 6 + (src[3]  + src[8])  * 3 - (src[2]  + src[9]));  \
554
-        OP(dst[6],  (src[6]  + src[7])  * 20 - (src[5]  + src[8])  * 6 + (src[4]  + src[9])  * 3 - (src[3]  + src[10])); \
555
-        OP(dst[7],  (src[7]  + src[8])  * 20 - (src[6]  + src[9])  * 6 + (src[5]  + src[10]) * 3 - (src[4]  + src[11])); \
556
-        OP(dst[8],  (src[8]  + src[9])  * 20 - (src[7]  + src[10]) * 6 + (src[6]  + src[11]) * 3 - (src[5]  + src[12])); \
557
-        OP(dst[9],  (src[9]  + src[10]) * 20 - (src[8]  + src[11]) * 6 + (src[7]  + src[12]) * 3 - (src[6]  + src[13])); \
558
-        OP(dst[10], (src[10] + src[11]) * 20 - (src[9]  + src[12]) * 6 + (src[8]  + src[13]) * 3 - (src[7]  + src[14])); \
559
-        OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9]  + src[14]) * 3 - (src[8]  + src[15])); \
560
-        OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9]  + src[16])); \
561
-        OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
562
-        OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
563
-        OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
564
-        dst += dstStride;                                                     \
565
-        src += srcStride;                                                     \
566
-    }                                                                         \
567
-}                                                                             \
568
-                                                                              \
569
-static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src,      \
570
-                                             int dstStride, int srcStride)    \
571
-{                                                                             \
572
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
573
-    const int w = 16;                                                         \
574
-    int i;                                                                    \
575
-                                                                              \
576
-    for (i = 0; i < w; i++) {                                                 \
577
-        const int src0  = src[0  * srcStride];                                \
578
-        const int src1  = src[1  * srcStride];                                \
579
-        const int src2  = src[2  * srcStride];                                \
580
-        const int src3  = src[3  * srcStride];                                \
581
-        const int src4  = src[4  * srcStride];                                \
582
-        const int src5  = src[5  * srcStride];                                \
583
-        const int src6  = src[6  * srcStride];                                \
584
-        const int src7  = src[7  * srcStride];                                \
585
-        const int src8  = src[8  * srcStride];                                \
586
-        const int src9  = src[9  * srcStride];                                \
587
-        const int src10 = src[10 * srcStride];                                \
588
-        const int src11 = src[11 * srcStride];                                \
589
-        const int src12 = src[12 * srcStride];                                \
590
-        const int src13 = src[13 * srcStride];                                \
591
-        const int src14 = src[14 * srcStride];                                \
592
-        const int src15 = src[15 * srcStride];                                \
593
-        const int src16 = src[16 * srcStride];                                \
594
-        OP(dst[0  * dstStride], (src0  + src1)  * 20 - (src0  + src2)  * 6 + (src1  + src3)  * 3 - (src2  + src4));  \
595
-        OP(dst[1  * dstStride], (src1  + src2)  * 20 - (src0  + src3)  * 6 + (src0  + src4)  * 3 - (src1  + src5));  \
596
-        OP(dst[2  * dstStride], (src2  + src3)  * 20 - (src1  + src4)  * 6 + (src0  + src5)  * 3 - (src0  + src6));  \
597
-        OP(dst[3  * dstStride], (src3  + src4)  * 20 - (src2  + src5)  * 6 + (src1  + src6)  * 3 - (src0  + src7));  \
598
-        OP(dst[4  * dstStride], (src4  + src5)  * 20 - (src3  + src6)  * 6 + (src2  + src7)  * 3 - (src1  + src8));  \
599
-        OP(dst[5  * dstStride], (src5  + src6)  * 20 - (src4  + src7)  * 6 + (src3  + src8)  * 3 - (src2  + src9));  \
600
-        OP(dst[6  * dstStride], (src6  + src7)  * 20 - (src5  + src8)  * 6 + (src4  + src9)  * 3 - (src3  + src10)); \
601
-        OP(dst[7  * dstStride], (src7  + src8)  * 20 - (src6  + src9)  * 6 + (src5  + src10) * 3 - (src4  + src11)); \
602
-        OP(dst[8  * dstStride], (src8  + src9)  * 20 - (src7  + src10) * 6 + (src6  + src11) * 3 - (src5  + src12)); \
603
-        OP(dst[9  * dstStride], (src9  + src10) * 20 - (src8  + src11) * 6 + (src7  + src12) * 3 - (src6  + src13)); \
604
-        OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9  + src12) * 6 + (src8  + src13) * 3 - (src7  + src14)); \
605
-        OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9  + src14) * 3 - (src8  + src15)); \
606
-        OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9  + src16)); \
607
-        OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
608
-        OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
609
-        OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
610
-        dst++;                                                                \
611
-        src++;                                                                \
612
-    }                                                                         \
613
-}                                                                             \
614
-                                                                              \
615
-static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src,                \
616
-                                   ptrdiff_t stride)                          \
617
-{                                                                             \
618
-    uint8_t half[64];                                                         \
619
-                                                                              \
620
-    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);             \
621
-    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);             \
622
-}                                                                             \
623
-                                                                              \
624
-static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src,                \
625
-                                   ptrdiff_t stride)                          \
626
-{                                                                             \
627
-    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);             \
628
-}                                                                             \
629
-                                                                              \
630
-static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src,                \
631
-                                   ptrdiff_t stride)                          \
632
-{                                                                             \
633
-    uint8_t half[64];                                                         \
634
-                                                                              \
635
-    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);             \
636
-    OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8);         \
637
-}                                                                             \
638
-                                                                              \
639
-static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src,                \
640
-                                   ptrdiff_t stride)                          \
641
-{                                                                             \
642
-    uint8_t full[16 * 9];                                                     \
643
-    uint8_t half[64];                                                         \
644
-                                                                              \
645
-    copy_block9(full, src, 16, stride, 9);                                    \
646
-    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);                   \
647
-    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);                \
648
-}                                                                             \
649
-                                                                              \
650
-static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src,                \
651
-                                   ptrdiff_t stride)                          \
652
-{                                                                             \
653
-    uint8_t full[16 * 9];                                                     \
654
-                                                                              \
655
-    copy_block9(full, src, 16, stride, 9);                                    \
656
-    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);                   \
657
-}                                                                             \
658
-                                                                              \
659
-static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src,                \
660
-                                   ptrdiff_t stride)                          \
661
-{                                                                             \
662
-    uint8_t full[16 * 9];                                                     \
663
-    uint8_t half[64];                                                         \
664
-                                                                              \
665
-    copy_block9(full, src, 16, stride, 9);                                    \
666
-    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);                   \
667
-    OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8);           \
668
-}                                                                             \
669
-                                                                              \
670
-void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src,            \
671
-                                       ptrdiff_t stride)                      \
672
-{                                                                             \
673
-    uint8_t full[16 * 9];                                                     \
674
-    uint8_t halfH[72];                                                        \
675
-    uint8_t halfV[64];                                                        \
676
-    uint8_t halfHV[64];                                                       \
677
-                                                                              \
678
-    copy_block9(full, src, 16, stride, 9);                                    \
679
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
680
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);                  \
681
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
682
-    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV,                   \
683
-                           stride, 16, 8, 8, 8, 8);                           \
684
-}                                                                             \
685
-                                                                              \
686
-static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src,                \
687
-                                   ptrdiff_t stride)                          \
688
-{                                                                             \
689
-    uint8_t full[16 * 9];                                                     \
690
-    uint8_t halfH[72];                                                        \
691
-    uint8_t halfHV[64];                                                       \
692
-                                                                              \
693
-    copy_block9(full, src, 16, stride, 9);                                    \
694
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
695
-    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);              \
696
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
697
-    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);              \
698
-}                                                                             \
699
-                                                                              \
700
-void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src,            \
701
-                                       ptrdiff_t stride)                      \
702
-{                                                                             \
703
-    uint8_t full[16 * 9];                                                     \
704
-    uint8_t halfH[72];                                                        \
705
-    uint8_t halfV[64];                                                        \
706
-    uint8_t halfHV[64];                                                       \
707
-                                                                              \
708
-    copy_block9(full, src, 16, stride, 9);                                    \
709
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
710
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16);              \
711
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
712
-    OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV,               \
713
-                           stride, 16, 8, 8, 8, 8);                           \
714
-}                                                                             \
715
-                                                                              \
716
-static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src,                \
717
-                                   ptrdiff_t stride)                          \
718
-{                                                                             \
719
-    uint8_t full[16 * 9];                                                     \
720
-    uint8_t halfH[72];                                                        \
721
-    uint8_t halfHV[64];                                                       \
722
-                                                                              \
723
-    copy_block9(full, src, 16, stride, 9);                                    \
724
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
725
-    put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9);          \
726
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
727
-    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);              \
728
-}                                                                             \
729
-                                                                              \
730
-void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src,            \
731
-                                       ptrdiff_t stride)                      \
732
-{                                                                             \
733
-    uint8_t full[16 * 9];                                                     \
734
-    uint8_t halfH[72];                                                        \
735
-    uint8_t halfV[64];                                                        \
736
-    uint8_t halfHV[64];                                                       \
737
-                                                                              \
738
-    copy_block9(full, src, 16, stride, 9);                                    \
739
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
740
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);                  \
741
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
742
-    OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV,          \
743
-                           stride, 16, 8, 8, 8, 8);                           \
744
-}                                                                             \
745
-                                                                              \
746
-static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src,                \
747
-                                   ptrdiff_t stride)                          \
748
-{                                                                             \
749
-    uint8_t full[16 * 9];                                                     \
750
-    uint8_t halfH[72];                                                        \
751
-    uint8_t halfHV[64];                                                       \
752
-                                                                              \
753
-    copy_block9(full, src, 16, stride, 9);                                    \
754
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
755
-    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);              \
756
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
757
-    OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8);          \
758
-}                                                                             \
759
-                                                                              \
760
-void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src,            \
761
-                                       ptrdiff_t stride)                      \
762
-{                                                                             \
763
-    uint8_t full[16 * 9];                                                     \
764
-    uint8_t halfH[72];                                                        \
765
-    uint8_t halfV[64];                                                        \
766
-    uint8_t halfHV[64];                                                       \
767
-                                                                              \
768
-    copy_block9(full, src, 16, stride, 9);                                    \
769
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
770
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16);              \
771
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
772
-    OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV,          \
773
-                           stride, 16, 8, 8, 8, 8);                           \
774
-}                                                                             \
775
-                                                                              \
776
-static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src,                \
777
-                                   ptrdiff_t stride)                          \
778
-{                                                                             \
779
-    uint8_t full[16 * 9];                                                     \
780
-    uint8_t halfH[72];                                                        \
781
-    uint8_t halfHV[64];                                                       \
782
-                                                                              \
783
-    copy_block9(full, src, 16, stride, 9);                                    \
784
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
785
-    put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9);          \
786
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
787
-    OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8);          \
788
-}                                                                             \
789
-                                                                              \
790
-static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src,                \
791
-                                   ptrdiff_t stride)                          \
792
-{                                                                             \
793
-    uint8_t halfH[72];                                                        \
794
-    uint8_t halfHV[64];                                                       \
795
-                                                                              \
796
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);            \
797
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
798
-    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);              \
799
-}                                                                             \
800
-                                                                              \
801
-static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src,                \
802
-                                   ptrdiff_t stride)                          \
803
-{                                                                             \
804
-    uint8_t halfH[72];                                                        \
805
-    uint8_t halfHV[64];                                                       \
806
-                                                                              \
807
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);            \
808
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
809
-    OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8);          \
810
-}                                                                             \
811
-                                                                              \
812
-void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src,            \
813
-                                       ptrdiff_t stride)                      \
814
-{                                                                             \
815
-    uint8_t full[16 * 9];                                                     \
816
-    uint8_t halfH[72];                                                        \
817
-    uint8_t halfV[64];                                                        \
818
-    uint8_t halfHV[64];                                                       \
819
-                                                                              \
820
-    copy_block9(full, src, 16, stride, 9);                                    \
821
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
822
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);                  \
823
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
824
-    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);              \
825
-}                                                                             \
826
-                                                                              \
827
-static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src,                \
828
-                                   ptrdiff_t stride)                          \
829
-{                                                                             \
830
-    uint8_t full[16 * 9];                                                     \
831
-    uint8_t halfH[72];                                                        \
832
-                                                                              \
833
-    copy_block9(full, src, 16, stride, 9);                                    \
834
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
835
-    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);              \
836
-    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);                   \
837
-}                                                                             \
838
-                                                                              \
839
-void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src,            \
840
-                                       ptrdiff_t stride)                      \
841
-{                                                                             \
842
-    uint8_t full[16 * 9];                                                     \
843
-    uint8_t halfH[72];                                                        \
844
-    uint8_t halfV[64];                                                        \
845
-    uint8_t halfHV[64];                                                       \
846
-                                                                              \
847
-    copy_block9(full, src, 16, stride, 9);                                    \
848
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
849
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16);              \
850
-    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
851
-    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);              \
852
-}                                                                             \
853
-                                                                              \
854
-static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src,                \
855
-                                   ptrdiff_t stride)                          \
856
-{                                                                             \
857
-    uint8_t full[16 * 9];                                                     \
858
-    uint8_t halfH[72];                                                        \
859
-                                                                              \
860
-    copy_block9(full, src, 16, stride, 9);                                    \
861
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
862
-    put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9);          \
863
-    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);                   \
864
-}                                                                             \
865
-                                                                              \
866
-static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src,                \
867
-                                   ptrdiff_t stride)                          \
868
-{                                                                             \
869
-    uint8_t halfH[72];                                                        \
870
-                                                                              \
871
-    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);            \
872
-    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);                   \
873
-}                                                                             \
874
-                                                                              \
875
-static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src,               \
876
-                                    ptrdiff_t stride)                         \
877
-{                                                                             \
878
-    uint8_t half[256];                                                        \
879
-                                                                              \
880
-    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);          \
881
-    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);          \
882
-}                                                                             \
883
-                                                                              \
884
-static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src,               \
885
-                                    ptrdiff_t stride)                         \
886
-{                                                                             \
887
-    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);           \
888
-}                                                                             \
889
-                                                                              \
890
-static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src,               \
891
-                                    ptrdiff_t stride)                         \
892
-{                                                                             \
893
-    uint8_t half[256];                                                        \
894
-                                                                              \
895
-    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);          \
896
-    OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16);      \
897
-}                                                                             \
898
-                                                                              \
899
-static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src,               \
900
-                                    ptrdiff_t stride)                         \
901
-{                                                                             \
902
-    uint8_t full[24 * 17];                                                    \
903
-    uint8_t half[256];                                                        \
904
-                                                                              \
905
-    copy_block17(full, src, 24, stride, 17);                                  \
906
-    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);                 \
907
-    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);             \
908
-}                                                                             \
909
-                                                                              \
910
-static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src,               \
911
-                                    ptrdiff_t stride)                         \
912
-{                                                                             \
913
-    uint8_t full[24 * 17];                                                    \
914
-                                                                              \
915
-    copy_block17(full, src, 24, stride, 17);                                  \
916
-    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);                  \
917
-}                                                                             \
918
-                                                                              \
919
-static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src,               \
920
-                                    ptrdiff_t stride)                         \
921
-{                                                                             \
922
-    uint8_t full[24 * 17];                                                    \
923
-    uint8_t half[256];                                                        \
924
-                                                                              \
925
-    copy_block17(full, src, 24, stride, 17);                                  \
926
-    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);                 \
927
-    OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16);        \
928
-}                                                                             \
929
-                                                                              \
930
-void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src,           \
931
-                                        ptrdiff_t stride)                     \
932
-{                                                                             \
933
-    uint8_t full[24 * 17];                                                    \
934
-    uint8_t halfH[272];                                                       \
935
-    uint8_t halfV[256];                                                       \
936
-    uint8_t halfHV[256];                                                      \
937
-                                                                              \
938
-    copy_block17(full, src, 24, stride, 17);                                  \
939
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
940
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);                \
941
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
942
-    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV,                  \
943
-                            stride, 24, 16, 16, 16, 16);                      \
944
-}                                                                             \
945
-                                                                              \
946
-static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src,               \
947
-                                    ptrdiff_t stride)                         \
948
-{                                                                             \
949
-    uint8_t full[24 * 17];                                                    \
950
-    uint8_t halfH[272];                                                       \
951
-    uint8_t halfHV[256];                                                      \
952
-                                                                              \
953
-    copy_block17(full, src, 24, stride, 17);                                  \
954
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
955
-    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);          \
956
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
957
-    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);          \
958
-}                                                                             \
959
-                                                                              \
960
-void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src,           \
961
-                                        ptrdiff_t stride)                     \
962
-{                                                                             \
963
-    uint8_t full[24 * 17];                                                    \
964
-    uint8_t halfH[272];                                                       \
965
-    uint8_t halfV[256];                                                       \
966
-    uint8_t halfHV[256];                                                      \
967
-                                                                              \
968
-    copy_block17(full, src, 24, stride, 17);                                  \
969
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
970
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24);            \
971
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
972
-    OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV,              \
973
-                            stride, 24, 16, 16, 16, 16);                      \
974
-}                                                                             \
975
-                                                                              \
976
-static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src,               \
977
-                                    ptrdiff_t stride)                         \
978
-{                                                                             \
979
-    uint8_t full[24 * 17];                                                    \
980
-    uint8_t halfH[272];                                                       \
981
-    uint8_t halfHV[256];                                                      \
982
-                                                                              \
983
-    copy_block17(full, src, 24, stride, 17);                                  \
984
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
985
-    put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17);      \
986
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
987
-    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);          \
988
-}                                                                             \
989
-                                                                              \
990
-void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src,           \
991
-                                        ptrdiff_t stride)                     \
992
-{                                                                             \
993
-    uint8_t full[24 * 17];                                                    \
994
-    uint8_t halfH[272];                                                       \
995
-    uint8_t halfV[256];                                                       \
996
-    uint8_t halfHV[256];                                                      \
997
-                                                                              \
998
-    copy_block17(full, src, 24, stride, 17);                                  \
999
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1000
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);                \
1001
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1002
-    OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV,        \
1003
-                            stride, 24, 16, 16, 16, 16);                      \
1004
-}                                                                             \
1005
-                                                                              \
1006
-static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src,               \
1007
-                                    ptrdiff_t stride)                         \
1008
-{                                                                             \
1009
-    uint8_t full[24 * 17];                                                    \
1010
-    uint8_t halfH[272];                                                       \
1011
-    uint8_t halfHV[256];                                                      \
1012
-                                                                              \
1013
-    copy_block17(full, src, 24, stride, 17);                                  \
1014
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1015
-    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);          \
1016
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1017
-    OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16);     \
1018
-}                                                                             \
1019
-                                                                              \
1020
-void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src,           \
1021
-                                        ptrdiff_t stride)                     \
1022
-{                                                                             \
1023
-    uint8_t full[24 * 17];                                                    \
1024
-    uint8_t halfH[272];                                                       \
1025
-    uint8_t halfV[256];                                                       \
1026
-    uint8_t halfHV[256];                                                      \
1027
-                                                                              \
1028
-    copy_block17(full, src, 24, stride, 17);                                  \
1029
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1030
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24);            \
1031
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1032
-    OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV,        \
1033
-                            stride, 24, 16, 16, 16, 16);                      \
1034
-}                                                                             \
1035
-                                                                              \
1036
-static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src,               \
1037
-                                    ptrdiff_t stride)                         \
1038
-{                                                                             \
1039
-    uint8_t full[24 * 17];                                                    \
1040
-    uint8_t halfH[272];                                                       \
1041
-    uint8_t halfHV[256];                                                      \
1042
-                                                                              \
1043
-    copy_block17(full, src, 24, stride, 17);                                  \
1044
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1045
-    put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17);      \
1046
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1047
-    OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16);     \
1048
-}                                                                             \
1049
-                                                                              \
1050
-static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src,               \
1051
-                                    ptrdiff_t stride)                         \
1052
-{                                                                             \
1053
-    uint8_t halfH[272];                                                       \
1054
-    uint8_t halfHV[256];                                                      \
1055
-                                                                              \
1056
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);         \
1057
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1058
-    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);          \
1059
-}                                                                             \
1060
-                                                                              \
1061
-static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src,               \
1062
-                                    ptrdiff_t stride)                         \
1063
-{                                                                             \
1064
-    uint8_t halfH[272];                                                       \
1065
-    uint8_t halfHV[256];                                                      \
1066
-                                                                              \
1067
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);         \
1068
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1069
-    OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16);     \
1070
-}                                                                             \
1071
-                                                                              \
1072
-void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src,           \
1073
-                                        ptrdiff_t stride)                     \
1074
-{                                                                             \
1075
-    uint8_t full[24 * 17];                                                    \
1076
-    uint8_t halfH[272];                                                       \
1077
-    uint8_t halfV[256];                                                       \
1078
-    uint8_t halfHV[256];                                                      \
1079
-                                                                              \
1080
-    copy_block17(full, src, 24, stride, 17);                                  \
1081
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1082
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);                \
1083
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1084
-    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);          \
1085
-}                                                                             \
1086
-                                                                              \
1087
-static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src,               \
1088
-                                    ptrdiff_t stride)                         \
1089
-{                                                                             \
1090
-    uint8_t full[24 * 17];                                                    \
1091
-    uint8_t halfH[272];                                                       \
1092
-                                                                              \
1093
-    copy_block17(full, src, 24, stride, 17);                                  \
1094
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1095
-    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);          \
1096
-    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);                 \
1097
-}                                                                             \
1098
-                                                                              \
1099
-void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src,           \
1100
-                                        ptrdiff_t stride)                     \
1101
-{                                                                             \
1102
-    uint8_t full[24 * 17];                                                    \
1103
-    uint8_t halfH[272];                                                       \
1104
-    uint8_t halfV[256];                                                       \
1105
-    uint8_t halfHV[256];                                                      \
1106
-                                                                              \
1107
-    copy_block17(full, src, 24, stride, 17);                                  \
1108
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1109
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24);            \
1110
-    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
1111
-    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);          \
1112
-}                                                                             \
1113
-                                                                              \
1114
-static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src,               \
1115
-                                    ptrdiff_t stride)                         \
1116
-{                                                                             \
1117
-    uint8_t full[24 * 17];                                                    \
1118
-    uint8_t halfH[272];                                                       \
1119
-                                                                              \
1120
-    copy_block17(full, src, 24, stride, 17);                                  \
1121
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
1122
-    put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17);      \
1123
-    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);                 \
1124
-}                                                                             \
1125
-                                                                              \
1126
-static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src,               \
1127
-                                    ptrdiff_t stride)                         \
1128
-{                                                                             \
1129
-    uint8_t halfH[272];                                                       \
1130
-                                                                              \
1131
-    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);         \
1132
-    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);                 \
1133
-}
1134
-
1135
-#define op_avg(a, b)        a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
1136
-#define op_avg_no_rnd(a, b) a = (((a) + cm[((b) + 15) >> 5])     >> 1)
1137
-#define op_put(a, b)        a = cm[((b) + 16) >> 5]
1138
-#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
1139
-
1140
-QPEL_MC(0, put_, _, op_put)
1141
-QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1142
-QPEL_MC(0, avg_, _, op_avg)
1143
-
1144
-#undef op_avg
1145
-#undef op_put
1146
-#undef op_put_no_rnd
1147
-
1148
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1149
-{
1150
-    put_pixels8_8_c(dst, src, stride, 8);
1151
-}
1152
-
1153
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1154
-{
1155
-    avg_pixels8_8_c(dst, src, stride, 8);
1156
-}
1157
-
1158
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1159
-{
1160
-    put_pixels16_8_c(dst, src, stride, 16);
1161
-}
1162
-
1163
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1164
-{
1165
-    avg_pixels16_8_c(dst, src, stride, 16);
1166
-}
1167
-
1168
-#define put_qpel8_mc00_c         ff_put_pixels8x8_c
1169
-#define avg_qpel8_mc00_c         ff_avg_pixels8x8_c
1170
-#define put_qpel16_mc00_c        ff_put_pixels16x16_c
1171
-#define avg_qpel16_mc00_c        ff_avg_pixels16x16_c
1172
-#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
1173
-#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1174
-
1175
-void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
1176
-                         int dst_stride, int src_stride1, int src_stride2,
1177
-                         int h)
1178
-{
1179
-    put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
1180
-
1181
-}
1182
-
1183 488
 static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
1184 489
                               int line_size, int h)
1185 490
 {
... ...
@@ -2198,35 +1500,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
2198 2198
     c->pix_abs[1][2] = pix_abs8_y2_c;
2199 2199
     c->pix_abs[1][3] = pix_abs8_xy2_c;
2200 2200
 
2201
-#define dspfunc(PFX, IDX, NUM)                              \
2202
-    c->PFX ## _pixels_tab[IDX][0]  = PFX ## NUM ## _mc00_c; \
2203
-    c->PFX ## _pixels_tab[IDX][1]  = PFX ## NUM ## _mc10_c; \
2204
-    c->PFX ## _pixels_tab[IDX][2]  = PFX ## NUM ## _mc20_c; \
2205
-    c->PFX ## _pixels_tab[IDX][3]  = PFX ## NUM ## _mc30_c; \
2206
-    c->PFX ## _pixels_tab[IDX][4]  = PFX ## NUM ## _mc01_c; \
2207
-    c->PFX ## _pixels_tab[IDX][5]  = PFX ## NUM ## _mc11_c; \
2208
-    c->PFX ## _pixels_tab[IDX][6]  = PFX ## NUM ## _mc21_c; \
2209
-    c->PFX ## _pixels_tab[IDX][7]  = PFX ## NUM ## _mc31_c; \
2210
-    c->PFX ## _pixels_tab[IDX][8]  = PFX ## NUM ## _mc02_c; \
2211
-    c->PFX ## _pixels_tab[IDX][9]  = PFX ## NUM ## _mc12_c; \
2212
-    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2213
-    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2214
-    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2215
-    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2216
-    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2217
-    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2218
-
2219
-    dspfunc(put_qpel, 0, 16);
2220
-    dspfunc(put_qpel, 1, 8);
2221
-
2222
-    dspfunc(put_no_rnd_qpel, 0, 16);
2223
-    dspfunc(put_no_rnd_qpel, 1, 8);
2224
-
2225
-    dspfunc(avg_qpel, 0, 16);
2226
-    dspfunc(avg_qpel, 1, 8);
2227
-
2228
-#undef dspfunc
2229
-
2230 2201
 #define SET_CMP_FUNC(name)                      \
2231 2202
     c->name[0] = name ## 16_c;                  \
2232 2203
     c->name[1] = name ## 8x8_c;
... ...
@@ -34,15 +34,6 @@
34 34
 
35 35
 extern uint32_t ff_square_tab[512];
36 36
 
37
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
38
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
39
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
40
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
41
-
42
-void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
43
-                         int dst_stride, int src_stride1, int src_stride2,
44
-                         int h);
45
-
46 37
 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
47 38
               int dxx, int dxy, int dyx, int dyy, int shift, int r,
48 39
               int width, int height);
... ...
@@ -64,33 +55,9 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
64 64
  * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
65 65
  * h for op_pixels_func is limited to { width / 2, width },
66 66
  * but never larger than 16 and never smaller than 4. */
67
-typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
68
-                             uint8_t *src /* align 1 */, ptrdiff_t stride);
69
-
70 67
 typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
71 68
                              uint8_t value, int line_size, int h);
72 69
 
73
-#define DEF_OLD_QPEL(name)                                                     \
74
-    void ff_put_        ## name(uint8_t *dst /* align width (8 or 16) */,      \
75
-                                uint8_t *src /* align 1 */, ptrdiff_t stride); \
76
-    void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */,      \
77
-                                uint8_t *src /* align 1 */, ptrdiff_t stride); \
78
-    void ff_avg_        ## name(uint8_t *dst /* align width (8 or 16) */,      \
79
-                                uint8_t *src /* align 1 */, ptrdiff_t stride);
80
-
81
-DEF_OLD_QPEL(qpel16_mc11_old_c)
82
-DEF_OLD_QPEL(qpel16_mc31_old_c)
83
-DEF_OLD_QPEL(qpel16_mc12_old_c)
84
-DEF_OLD_QPEL(qpel16_mc32_old_c)
85
-DEF_OLD_QPEL(qpel16_mc13_old_c)
86
-DEF_OLD_QPEL(qpel16_mc33_old_c)
87
-DEF_OLD_QPEL(qpel8_mc11_old_c)
88
-DEF_OLD_QPEL(qpel8_mc31_old_c)
89
-DEF_OLD_QPEL(qpel8_mc12_old_c)
90
-DEF_OLD_QPEL(qpel8_mc32_old_c)
91
-DEF_OLD_QPEL(qpel8_mc13_old_c)
92
-DEF_OLD_QPEL(qpel8_mc33_old_c)
93
-
94 70
 struct MpegEncContext;
95 71
 /* Motion estimation:
96 72
  * h is limited to { width / 2, width, 2 * width },
... ...
@@ -174,10 +141,6 @@ typedef struct DSPContext {
174 174
     me_cmp_func ildct_cmp[6]; // only width 16 used
175 175
     me_cmp_func frame_skip_cmp[6]; // only width 8 used
176 176
 
177
-    qpel_mc_func put_qpel_pixels_tab[2][16];
178
-    qpel_mc_func avg_qpel_pixels_tab[2][16];
179
-    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
180
-
181 177
     me_cmp_func pix_abs[2][4];
182 178
 
183 179
     void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
184 180
deleted file mode 100644
... ...
@@ -1,223 +0,0 @@
1
-/*
2
- * DSP utils
3
- * Copyright (c) 2000, 2001 Fabrice Bellard
4
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
- *
6
- * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
- *
8
- * This file is part of Libav.
9
- *
10
- * Libav is free software; you can redistribute it and/or
11
- * modify it under the terms of the GNU Lesser General Public
12
- * License as published by the Free Software Foundation; either
13
- * version 2.1 of the License, or (at your option) any later version.
14
- *
15
- * Libav is distributed in the hope that it will be useful,
16
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
- * Lesser General Public License for more details.
19
- *
20
- * You should have received a copy of the GNU Lesser General Public
21
- * License along with Libav; if not, write to the Free Software
22
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
- */
24
-
25
-/**
26
- * @file
27
- * DSP utils
28
- */
29
-
30
-#define PIXOP2(OPNAME, OP)                                              \
31
-static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst,         \
32
-                                                  const uint8_t *src1,  \
33
-                                                  const uint8_t *src2,  \
34
-                                                  int dst_stride,       \
35
-                                                  int src_stride1,      \
36
-                                                  int src_stride2,      \
37
-                                                  int h)                \
38
-{                                                                       \
39
-    int i;                                                              \
40
-                                                                        \
41
-    for (i = 0; i < h; i++) {                                           \
42
-        uint32_t a, b;                                                  \
43
-        a = AV_RN32(&src1[i * src_stride1]);                            \
44
-        b = AV_RN32(&src2[i * src_stride2]);                            \
45
-        OP(*((uint32_t *) &dst[i * dst_stride]),                        \
46
-           no_rnd_avg32(a, b));                                         \
47
-        a = AV_RN32(&src1[i * src_stride1 + 4]);                        \
48
-        b = AV_RN32(&src2[i * src_stride2 + 4]);                        \
49
-        OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
50
-           no_rnd_avg32(a, b));                                         \
51
-    }                                                                   \
52
-}                                                                       \
53
-                                                                        \
54
-static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst,        \
55
-                                                   const uint8_t *src1, \
56
-                                                   const uint8_t *src2, \
57
-                                                   int dst_stride,      \
58
-                                                   int src_stride1,     \
59
-                                                   int src_stride2,     \
60
-                                                   int h)               \
61
-{                                                                       \
62
-    OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride,         \
63
-                                   src_stride1, src_stride2, h);        \
64
-    OPNAME ## _no_rnd_pixels8_l2_8(dst  + 8,                            \
65
-                                   src1 + 8,                            \
66
-                                   src2 + 8,                            \
67
-                                   dst_stride, src_stride1,             \
68
-                                   src_stride2, h);                     \
69
-}                                                                       \
70
-                                                                        \
71
-static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst,                \
72
-                                           const uint8_t *src1,         \
73
-                                           const uint8_t *src2,         \
74
-                                           const uint8_t *src3,         \
75
-                                           const uint8_t *src4,         \
76
-                                           int dst_stride,              \
77
-                                           int src_stride1,             \
78
-                                           int src_stride2,             \
79
-                                           int src_stride3,             \
80
-                                           int src_stride4,             \
81
-                                           int h)                       \
82
-{                                                                       \
83
-    /* FIXME HIGH BIT DEPTH */                                          \
84
-    int i;                                                              \
85
-                                                                        \
86
-    for (i = 0; i < h; i++) {                                           \
87
-        uint32_t a, b, c, d, l0, l1, h0, h1;                            \
88
-        a  = AV_RN32(&src1[i * src_stride1]);                           \
89
-        b  = AV_RN32(&src2[i * src_stride2]);                           \
90
-        c  = AV_RN32(&src3[i * src_stride3]);                           \
91
-        d  = AV_RN32(&src4[i * src_stride4]);                           \
92
-        l0 = (a & 0x03030303UL) +                                       \
93
-             (b & 0x03030303UL) +                                       \
94
-                  0x02020202UL;                                         \
95
-        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
96
-             ((b & 0xFCFCFCFCUL) >> 2);                                 \
97
-        l1 = (c & 0x03030303UL) +                                       \
98
-             (d & 0x03030303UL);                                        \
99
-        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
100
-             ((d & 0xFCFCFCFCUL) >> 2);                                 \
101
-        OP(*((uint32_t *) &dst[i * dst_stride]),                        \
102
-           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
103
-        a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
104
-        b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
105
-        c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
106
-        d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
107
-        l0 = (a & 0x03030303UL) +                                       \
108
-             (b & 0x03030303UL) +                                       \
109
-                  0x02020202UL;                                         \
110
-        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
111
-             ((b & 0xFCFCFCFCUL) >> 2);                                 \
112
-        l1 = (c & 0x03030303UL) +                                       \
113
-             (d & 0x03030303UL);                                        \
114
-        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
115
-             ((d & 0xFCFCFCFCUL) >> 2);                                 \
116
-        OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
117
-           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
118
-    }                                                                   \
119
-}                                                                       \
120
-                                                                        \
121
-static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst,         \
122
-                                                  const uint8_t *src1,  \
123
-                                                  const uint8_t *src2,  \
124
-                                                  const uint8_t *src3,  \
125
-                                                  const uint8_t *src4,  \
126
-                                                  int dst_stride,       \
127
-                                                  int src_stride1,      \
128
-                                                  int src_stride2,      \
129
-                                                  int src_stride3,      \
130
-                                                  int src_stride4,      \
131
-                                                  int h)                \
132
-{                                                                       \
133
-    /* FIXME HIGH BIT DEPTH */                                          \
134
-    int i;                                                              \
135
-                                                                        \
136
-    for (i = 0; i < h; i++) {                                           \
137
-        uint32_t a, b, c, d, l0, l1, h0, h1;                            \
138
-        a  = AV_RN32(&src1[i * src_stride1]);                           \
139
-        b  = AV_RN32(&src2[i * src_stride2]);                           \
140
-        c  = AV_RN32(&src3[i * src_stride3]);                           \
141
-        d  = AV_RN32(&src4[i * src_stride4]);                           \
142
-        l0 = (a & 0x03030303UL) +                                       \
143
-             (b & 0x03030303UL) +                                       \
144
-                  0x01010101UL;                                         \
145
-        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
146
-             ((b & 0xFCFCFCFCUL) >> 2);                                 \
147
-        l1 = (c & 0x03030303UL) +                                       \
148
-             (d & 0x03030303UL);                                        \
149
-        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
150
-             ((d & 0xFCFCFCFCUL) >> 2);                                 \
151
-        OP(*((uint32_t *) &dst[i * dst_stride]),                        \
152
-           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
153
-        a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
154
-        b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
155
-        c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
156
-        d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
157
-        l0 = (a & 0x03030303UL) +                                       \
158
-             (b & 0x03030303UL) +                                       \
159
-                  0x01010101UL;                                         \
160
-        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
161
-             ((b & 0xFCFCFCFCUL) >> 2);                                 \
162
-        l1 = (c & 0x03030303UL) +                                       \
163
-             (d & 0x03030303UL);                                        \
164
-        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
165
-             ((d & 0xFCFCFCFCUL) >> 2);                                 \
166
-        OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
167
-           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
168
-    }                                                                   \
169
-}                                                                       \
170
-                                                                        \
171
-static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst,               \
172
-                                            const uint8_t *src1,        \
173
-                                            const uint8_t *src2,        \
174
-                                            const uint8_t *src3,        \
175
-                                            const uint8_t *src4,        \
176
-                                            int dst_stride,             \
177
-                                            int src_stride1,            \
178
-                                            int src_stride2,            \
179
-                                            int src_stride3,            \
180
-                                            int src_stride4,            \
181
-                                            int h)                      \
182
-{                                                                       \
183
-    OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride,    \
184
-                            src_stride1, src_stride2, src_stride3,      \
185
-                            src_stride4, h);                            \
186
-    OPNAME ## _pixels8_l4_8(dst  + 8,                                   \
187
-                            src1 + 8, src2 + 8,                         \
188
-                            src3 + 8, src4 + 8,                         \
189
-                            dst_stride, src_stride1, src_stride2,       \
190
-                            src_stride3, src_stride4, h);               \
191
-}                                                                       \
192
-                                                                        \
193
-static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst,        \
194
-                                                   const uint8_t *src1, \
195
-                                                   const uint8_t *src2, \
196
-                                                   const uint8_t *src3, \
197
-                                                   const uint8_t *src4, \
198
-                                                   int dst_stride,      \
199
-                                                   int src_stride1,     \
200
-                                                   int src_stride2,     \
201
-                                                   int src_stride3,     \
202
-                                                   int src_stride4,     \
203
-                                                   int h)               \
204
-{                                                                       \
205
-    OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4,         \
206
-                                   dst_stride, src_stride1,             \
207
-                                   src_stride2, src_stride3,            \
208
-                                   src_stride4, h);                     \
209
-    OPNAME ## _no_rnd_pixels8_l4_8(dst  + 8,                            \
210
-                                   src1 + 8, src2 + 8,                  \
211
-                                   src3 + 8, src4 + 8,                  \
212
-                                   dst_stride, src_stride1,             \
213
-                                   src_stride2, src_stride3,            \
214
-                                   src_stride4, h);                     \
215
-}                                                                       \
216
-
217
-#define op_avg(a, b) a = rnd_avg32(a, b)
218
-#define op_put(a, b) a = b
219
-#define put_no_rnd_pixels8_8_c put_pixels8_8_c
220
-PIXOP2(avg, op_avg)
221
-PIXOP2(put, op_put)
222
-#undef op_avg
223
-#undef op_put
... ...
@@ -36,6 +36,7 @@
36 36
 #include "mpeg4video_parser.h"
37 37
 #include "mpegvideo.h"
38 38
 #include "msmpeg4.h"
39
+#include "qpeldsp.h"
39 40
 #include "thread.h"
40 41
 
41 42
 av_cold int ff_h263_decode_init(AVCodecContext *avctx)
... ...
@@ -116,6 +117,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
116 116
             return ret;
117 117
 
118 118
     ff_h263dsp_init(&s->h263dsp);
119
+    ff_qpeldsp_init(&s->qdsp);
119 120
     ff_h263_decode_init_vlc();
120 121
 
121 122
     return 0;
... ...
@@ -461,9 +463,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
461 461
     avctx->has_b_frames = !s->low_delay;
462 462
 
463 463
 #define SET_QPEL_FUNC(postfix1, postfix2)                           \
464
-    s->dsp.put_        ## postfix1 = ff_put_        ## postfix2;    \
465
-    s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;    \
466
-    s->dsp.avg_        ## postfix1 = ff_avg_        ## postfix2;
464
+    s->qdsp.put_        ## postfix1 = ff_put_        ## postfix2;   \
465
+    s->qdsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;   \
466
+    s->qdsp.avg_        ## postfix1 = ff_avg_        ## postfix2;
467 467
 
468 468
     if (s->workaround_bugs & FF_BUG_STD_QPEL) {
469 469
         SET_QPEL_FUNC(qpel_pixels_tab[0][5], qpel16_mc11_old_c)
... ...
@@ -527,11 +529,11 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
527 527
     }
528 528
 
529 529
     if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
530
-        s->me.qpel_put = s->dsp.put_qpel_pixels_tab;
531
-        s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab;
530
+        s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
531
+        s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
532 532
     } else {
533
-        s->me.qpel_put = s->dsp.put_no_rnd_qpel_pixels_tab;
534
-        s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab;
533
+        s->me.qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab;
534
+        s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
535 535
     }
536 536
 
537 537
     if ((ret = ff_MPV_frame_start(s, avctx)) < 0)
... ...
@@ -39,6 +39,7 @@
39 39
 #include "h264qpel.h"
40 40
 #include "mpegutils.h"
41 41
 #include "parser.h"
42
+#include "qpeldsp.h"
42 43
 #include "rectangle.h"
43 44
 #include "videodsp.h"
44 45
 
... ...
@@ -32,6 +32,7 @@
32 32
 #include "libavutil/intreadwrite.h"
33 33
 #include "avcodec.h"
34 34
 #include "h264.h"
35
+#include "qpeldsp.h"
35 36
 #include "svq3.h"
36 37
 #include "thread.h"
37 38
 
... ...
@@ -22,7 +22,7 @@
22 22
 #ifndef AVCODEC_H264QPEL_H
23 23
 #define AVCODEC_H264QPEL_H
24 24
 
25
-#include "dsputil.h"
25
+#include "qpeldsp.h"
26 26
 
27 27
 typedef struct H264QpelContext {
28 28
     qpel_mc_func put_h264_qpel_pixels_tab[4][16];
... ...
@@ -329,9 +329,11 @@ int ff_init_me(MpegEncContext *s){
329 329
 /*FIXME s->no_rounding b_type*/
330 330
     if(s->flags&CODEC_FLAG_QPEL){
331 331
         c->sub_motion_search= qpel_motion_search;
332
-        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
333
-        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
334
-        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
332
+        c->qpel_avg = s->qdsp.avg_qpel_pixels_tab;
333
+        if (s->no_rounding)
334
+            c->qpel_put = s->qdsp.put_no_rnd_qpel_pixels_tab;
335
+        else
336
+            c->qpel_put = s->qdsp.put_qpel_pixels_tab;
335 337
     }else{
336 338
         if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
337 339
             c->sub_motion_search= hpel_motion_search;
... ...
@@ -622,9 +624,9 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
622 622
                 dxy = ((my4 & 3) << 2) | (mx4 & 3);
623 623
 
624 624
                 if(s->no_rounding)
625
-                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
625
+                    s->qdsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
626 626
                 else
627
-                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
627
+                    s->qdsp.put_qpel_pixels_tab[1][dxy](dest_y, ref, stride);
628 628
             }else{
629 629
                 uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
630 630
                 dxy = ((my4 & 1) << 1) | (mx4 & 1);
... ...
@@ -1208,14 +1210,14 @@ static inline int check_bidir_mv(MpegEncContext * s,
1208 1208
         src_y = motion_fy >> 2;
1209 1209
 
1210 1210
         ptr = ref_data[0] + (src_y * stride) + src_x;
1211
-        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
1211
+        s->qdsp.put_qpel_pixels_tab[0][dxy](dest_y, ptr, stride);
1212 1212
 
1213 1213
         dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1214 1214
         src_x = motion_bx >> 2;
1215 1215
         src_y = motion_by >> 2;
1216 1216
 
1217 1217
         ptr = ref2_data[0] + (src_y * stride) + src_x;
1218
-        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
1218
+        s->qdsp.avg_qpel_pixels_tab[size][dxy](dest_y, ptr, stride);
1219 1219
     }else{
1220 1220
         dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1221 1221
         src_x = motion_fx >> 1;
... ...
@@ -40,6 +40,7 @@
40 40
 #include "mpegvideo.h"
41 41
 #include "mjpegenc.h"
42 42
 #include "msmpeg4.h"
43
+#include "qpeldsp.h"
43 44
 #include "xvmc_internal.h"
44 45
 #include "thread.h"
45 46
 #include <limits.h>
... ...
@@ -38,6 +38,7 @@
38 38
 #include "ratecontrol.h"
39 39
 #include "parser.h"
40 40
 #include "mpeg12data.h"
41
+#include "qpeldsp.h"
41 42
 #include "rl.h"
42 43
 #include "thread.h"
43 44
 #include "videodsp.h"
... ...
@@ -348,6 +349,7 @@ typedef struct MpegEncContext {
348 348
 
349 349
     DSPContext dsp;             ///< pointers for accelerated dsp functions
350 350
     HpelDSPContext hdsp;
351
+    QpelDSPContext qdsp;
351 352
     VideoDSPContext vdsp;
352 353
     H263DSPContext h263dsp;
353 354
     int f_code;                 ///< forward MV resolution
... ...
@@ -46,6 +46,7 @@
46 46
 #include "mpegutils.h"
47 47
 #include "mjpegenc.h"
48 48
 #include "msmpeg4.h"
49
+#include "qpeldsp.h"
49 50
 #include "faandct.h"
50 51
 #include "thread.h"
51 52
 #include "aandcttab.h"
... ...
@@ -687,6 +688,8 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
687 687
     if (ARCH_X86)
688 688
         ff_MPV_encode_init_x86(s);
689 689
 
690
+    ff_qpeldsp_init(&s->qdsp);
691
+
690 692
     s->avctx->coded_frame = s->current_picture.f;
691 693
 
692 694
     if (s->msmpeg4_version) {
... ...
@@ -1944,10 +1947,10 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
1944 1944
 
1945 1945
         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1946 1946
             op_pix  = s->hdsp.put_pixels_tab;
1947
-            op_qpix = s->dsp.put_qpel_pixels_tab;
1947
+            op_qpix = s->qdsp.put_qpel_pixels_tab;
1948 1948
         } else {
1949 1949
             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1950
-            op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1950
+            op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1951 1951
         }
1952 1952
 
1953 1953
         if (s->mv_dir & MV_DIR_FORWARD) {
... ...
@@ -1955,7 +1958,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
1955 1955
                           s->last_picture.f->data,
1956 1956
                           op_pix, op_qpix);
1957 1957
             op_pix  = s->hdsp.avg_pixels_tab;
1958
-            op_qpix = s->dsp.avg_qpel_pixels_tab;
1958
+            op_qpix = s->qdsp.avg_qpel_pixels_tab;
1959 1959
         }
1960 1960
         if (s->mv_dir & MV_DIR_BACKWARD) {
1961 1961
             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
... ...
@@ -31,6 +31,7 @@
31 31
 #include "mpegvideo.h"
32 32
 #include "mjpegenc.h"
33 33
 #include "msmpeg4.h"
34
+#include "qpeldsp.h"
34 35
 #include <limits.h>
35 36
 
36 37
 static void gmc1_motion(MpegEncContext *s,
... ...
@@ -27,6 +27,7 @@
27 27
 #include "error_resilience.h"
28 28
 #include "internal.h"
29 29
 #include "msmpeg4data.h"
30
+#include "qpeldsp.h"
30 31
 #include "vc1.h"
31 32
 #include "mss12.h"
32 33
 #include "mss2dsp.h"
... ...
@@ -37,6 +38,7 @@ typedef struct MSS2Context {
37 37
     AVFrame       *last_pic;
38 38
     MSS12Context   c;
39 39
     MSS2DSPContext dsp;
40
+    QpelDSPContext qdsp;
40 41
     SliceContext   sc[2];
41 42
 } MSS2Context;
42 43
 
... ...
@@ -787,8 +789,8 @@ static av_cold int wmv9_init(AVCodecContext *avctx)
787 787
         return ret;
788 788
 
789 789
     /* error concealment */
790
-    v->s.me.qpel_put = v->s.dsp.put_qpel_pixels_tab;
791
-    v->s.me.qpel_avg = v->s.dsp.avg_qpel_pixels_tab;
790
+    v->s.me.qpel_put = v->s.qdsp.put_qpel_pixels_tab;
791
+    v->s.me.qpel_avg = v->s.qdsp.avg_qpel_pixels_tab;
792 792
 
793 793
     return 0;
794 794
 }
... ...
@@ -827,6 +829,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx)
827 827
         return ret;
828 828
     }
829 829
     ff_mss2dsp_init(&ctx->dsp);
830
+    ff_qpeldsp_init(&ctx->qdsp);
830 831
 
831 832
     avctx->pix_fmt = c->free_colours == 127 ? AV_PIX_FMT_RGB555
832 833
                                             : AV_PIX_FMT_RGB24;
833 834
new file mode 100644
... ...
@@ -0,0 +1,219 @@
0
+/*
1
+ * quarterpel DSP function templates
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+/**
21
+ * @file
22
+ * quarterpel DSP function templates
23
+ */
24
+
25
+#define PIXOP2(OPNAME, OP)                                              \
26
+static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst,         \
27
+                                                  const uint8_t *src1,  \
28
+                                                  const uint8_t *src2,  \
29
+                                                  int dst_stride,       \
30
+                                                  int src_stride1,      \
31
+                                                  int src_stride2,      \
32
+                                                  int h)                \
33
+{                                                                       \
34
+    int i;                                                              \
35
+                                                                        \
36
+    for (i = 0; i < h; i++) {                                           \
37
+        uint32_t a, b;                                                  \
38
+        a = AV_RN32(&src1[i * src_stride1]);                            \
39
+        b = AV_RN32(&src2[i * src_stride2]);                            \
40
+        OP(*((uint32_t *) &dst[i * dst_stride]),                        \
41
+           no_rnd_avg32(a, b));                                         \
42
+        a = AV_RN32(&src1[i * src_stride1 + 4]);                        \
43
+        b = AV_RN32(&src2[i * src_stride2 + 4]);                        \
44
+        OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
45
+           no_rnd_avg32(a, b));                                         \
46
+    }                                                                   \
47
+}                                                                       \
48
+                                                                        \
49
+static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst,        \
50
+                                                   const uint8_t *src1, \
51
+                                                   const uint8_t *src2, \
52
+                                                   int dst_stride,      \
53
+                                                   int src_stride1,     \
54
+                                                   int src_stride2,     \
55
+                                                   int h)               \
56
+{                                                                       \
57
+    OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride,         \
58
+                                   src_stride1, src_stride2, h);        \
59
+    OPNAME ## _no_rnd_pixels8_l2_8(dst  + 8,                            \
60
+                                   src1 + 8,                            \
61
+                                   src2 + 8,                            \
62
+                                   dst_stride, src_stride1,             \
63
+                                   src_stride2, h);                     \
64
+}                                                                       \
65
+                                                                        \
66
+static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst,                \
67
+                                           const uint8_t *src1,         \
68
+                                           const uint8_t *src2,         \
69
+                                           const uint8_t *src3,         \
70
+                                           const uint8_t *src4,         \
71
+                                           int dst_stride,              \
72
+                                           int src_stride1,             \
73
+                                           int src_stride2,             \
74
+                                           int src_stride3,             \
75
+                                           int src_stride4,             \
76
+                                           int h)                       \
77
+{                                                                       \
78
+    /* FIXME HIGH BIT DEPTH */                                          \
79
+    int i;                                                              \
80
+                                                                        \
81
+    for (i = 0; i < h; i++) {                                           \
82
+        uint32_t a, b, c, d, l0, l1, h0, h1;                            \
83
+        a  = AV_RN32(&src1[i * src_stride1]);                           \
84
+        b  = AV_RN32(&src2[i * src_stride2]);                           \
85
+        c  = AV_RN32(&src3[i * src_stride3]);                           \
86
+        d  = AV_RN32(&src4[i * src_stride4]);                           \
87
+        l0 = (a & 0x03030303UL) +                                       \
88
+             (b & 0x03030303UL) +                                       \
89
+                  0x02020202UL;                                         \
90
+        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
91
+             ((b & 0xFCFCFCFCUL) >> 2);                                 \
92
+        l1 = (c & 0x03030303UL) +                                       \
93
+             (d & 0x03030303UL);                                        \
94
+        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
95
+             ((d & 0xFCFCFCFCUL) >> 2);                                 \
96
+        OP(*((uint32_t *) &dst[i * dst_stride]),                        \
97
+           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
98
+        a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
99
+        b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
100
+        c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
101
+        d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
102
+        l0 = (a & 0x03030303UL) +                                       \
103
+             (b & 0x03030303UL) +                                       \
104
+                  0x02020202UL;                                         \
105
+        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
106
+             ((b & 0xFCFCFCFCUL) >> 2);                                 \
107
+        l1 = (c & 0x03030303UL) +                                       \
108
+             (d & 0x03030303UL);                                        \
109
+        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
110
+             ((d & 0xFCFCFCFCUL) >> 2);                                 \
111
+        OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
112
+           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
113
+    }                                                                   \
114
+}                                                                       \
115
+                                                                        \
116
+static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst,         \
117
+                                                  const uint8_t *src1,  \
118
+                                                  const uint8_t *src2,  \
119
+                                                  const uint8_t *src3,  \
120
+                                                  const uint8_t *src4,  \
121
+                                                  int dst_stride,       \
122
+                                                  int src_stride1,      \
123
+                                                  int src_stride2,      \
124
+                                                  int src_stride3,      \
125
+                                                  int src_stride4,      \
126
+                                                  int h)                \
127
+{                                                                       \
128
+    /* FIXME HIGH BIT DEPTH */                                          \
129
+    int i;                                                              \
130
+                                                                        \
131
+    for (i = 0; i < h; i++) {                                           \
132
+        uint32_t a, b, c, d, l0, l1, h0, h1;                            \
133
+        a  = AV_RN32(&src1[i * src_stride1]);                           \
134
+        b  = AV_RN32(&src2[i * src_stride2]);                           \
135
+        c  = AV_RN32(&src3[i * src_stride3]);                           \
136
+        d  = AV_RN32(&src4[i * src_stride4]);                           \
137
+        l0 = (a & 0x03030303UL) +                                       \
138
+             (b & 0x03030303UL) +                                       \
139
+                  0x01010101UL;                                         \
140
+        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
141
+             ((b & 0xFCFCFCFCUL) >> 2);                                 \
142
+        l1 = (c & 0x03030303UL) +                                       \
143
+             (d & 0x03030303UL);                                        \
144
+        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
145
+             ((d & 0xFCFCFCFCUL) >> 2);                                 \
146
+        OP(*((uint32_t *) &dst[i * dst_stride]),                        \
147
+           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
148
+        a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
149
+        b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
150
+        c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
151
+        d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
152
+        l0 = (a & 0x03030303UL) +                                       \
153
+             (b & 0x03030303UL) +                                       \
154
+                  0x01010101UL;                                         \
155
+        h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
156
+             ((b & 0xFCFCFCFCUL) >> 2);                                 \
157
+        l1 = (c & 0x03030303UL) +                                       \
158
+             (d & 0x03030303UL);                                        \
159
+        h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
160
+             ((d & 0xFCFCFCFCUL) >> 2);                                 \
161
+        OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
162
+           h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
163
+    }                                                                   \
164
+}                                                                       \
165
+                                                                        \
166
+static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst,               \
167
+                                            const uint8_t *src1,        \
168
+                                            const uint8_t *src2,        \
169
+                                            const uint8_t *src3,        \
170
+                                            const uint8_t *src4,        \
171
+                                            int dst_stride,             \
172
+                                            int src_stride1,            \
173
+                                            int src_stride2,            \
174
+                                            int src_stride3,            \
175
+                                            int src_stride4,            \
176
+                                            int h)                      \
177
+{                                                                       \
178
+    OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride,    \
179
+                            src_stride1, src_stride2, src_stride3,      \
180
+                            src_stride4, h);                            \
181
+    OPNAME ## _pixels8_l4_8(dst  + 8,                                   \
182
+                            src1 + 8, src2 + 8,                         \
183
+                            src3 + 8, src4 + 8,                         \
184
+                            dst_stride, src_stride1, src_stride2,       \
185
+                            src_stride3, src_stride4, h);               \
186
+}                                                                       \
187
+                                                                        \
188
+static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst,        \
189
+                                                   const uint8_t *src1, \
190
+                                                   const uint8_t *src2, \
191
+                                                   const uint8_t *src3, \
192
+                                                   const uint8_t *src4, \
193
+                                                   int dst_stride,      \
194
+                                                   int src_stride1,     \
195
+                                                   int src_stride2,     \
196
+                                                   int src_stride3,     \
197
+                                                   int src_stride4,     \
198
+                                                   int h)               \
199
+{                                                                       \
200
+    OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4,         \
201
+                                   dst_stride, src_stride1,             \
202
+                                   src_stride2, src_stride3,            \
203
+                                   src_stride4, h);                     \
204
+    OPNAME ## _no_rnd_pixels8_l4_8(dst  + 8,                            \
205
+                                   src1 + 8, src2 + 8,                  \
206
+                                   src3 + 8, src4 + 8,                  \
207
+                                   dst_stride, src_stride1,             \
208
+                                   src_stride2, src_stride3,            \
209
+                                   src_stride4, h);                     \
210
+}                                                                       \
211
+
212
+#define op_avg(a, b) a = rnd_avg32(a, b)
213
+#define op_put(a, b) a = b
214
+#define put_no_rnd_pixels8_8_c put_pixels8_8_c
215
+PIXOP2(avg, op_avg)
216
+PIXOP2(put, op_put)
217
+#undef op_avg
218
+#undef op_put
0 219
new file mode 100644
... ...
@@ -0,0 +1,764 @@
0
+/*
1
+ * quarterpel DSP functions
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+/**
21
+ * @file
22
+ * quarterpel DSP functions
23
+ */
24
+
25
+#include <stddef.h>
26
+#include <stdint.h>
27
+
28
+#include "config.h"
29
+#include "libavutil/attributes.h"
30
+#include "copy_block.h"
31
+#include "qpeldsp.h"
32
+
33
+#define BIT_DEPTH 8
34
+#include "hpel_template.c"
35
+#include "tpel_template.c"
36
+#include "qpel_template.c"
37
+
38
+#define QPEL_MC(r, OPNAME, RND, OP)                                           \
39
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src,       \
40
+                                            int dstStride, int srcStride,     \
41
+                                            int h)                            \
42
+{                                                                             \
43
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
44
+    int i;                                                                    \
45
+                                                                              \
46
+    for (i = 0; i < h; i++) {                                                 \
47
+        OP(dst[0], (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 + (src[1] + src[3]) * 3 - (src[2] + src[4])); \
48
+        OP(dst[1], (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 + (src[0] + src[4]) * 3 - (src[1] + src[5])); \
49
+        OP(dst[2], (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 + (src[0] + src[5]) * 3 - (src[0] + src[6])); \
50
+        OP(dst[3], (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 + (src[1] + src[6]) * 3 - (src[0] + src[7])); \
51
+        OP(dst[4], (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 + (src[2] + src[7]) * 3 - (src[1] + src[8])); \
52
+        OP(dst[5], (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 + (src[3] + src[8]) * 3 - (src[2] + src[8])); \
53
+        OP(dst[6], (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 + (src[4] + src[8]) * 3 - (src[3] + src[7])); \
54
+        OP(dst[7], (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 + (src[5] + src[7]) * 3 - (src[4] + src[6])); \
55
+        dst += dstStride;                                                     \
56
+        src += srcStride;                                                     \
57
+    }                                                                         \
58
+}                                                                             \
59
+                                                                              \
60
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src,       \
61
+                                            int dstStride, int srcStride)     \
62
+{                                                                             \
63
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
64
+    const int w = 8;                                                          \
65
+    int i;                                                                    \
66
+                                                                              \
67
+    for (i = 0; i < w; i++) {                                                 \
68
+        const int src0 = src[0 * srcStride];                                  \
69
+        const int src1 = src[1 * srcStride];                                  \
70
+        const int src2 = src[2 * srcStride];                                  \
71
+        const int src3 = src[3 * srcStride];                                  \
72
+        const int src4 = src[4 * srcStride];                                  \
73
+        const int src5 = src[5 * srcStride];                                  \
74
+        const int src6 = src[6 * srcStride];                                  \
75
+        const int src7 = src[7 * srcStride];                                  \
76
+        const int src8 = src[8 * srcStride];                                  \
77
+        OP(dst[0 * dstStride], (src0 + src1) * 20 - (src0 + src2) * 6 + (src1 + src3) * 3 - (src2 + src4)); \
78
+        OP(dst[1 * dstStride], (src1 + src2) * 20 - (src0 + src3) * 6 + (src0 + src4) * 3 - (src1 + src5)); \
79
+        OP(dst[2 * dstStride], (src2 + src3) * 20 - (src1 + src4) * 6 + (src0 + src5) * 3 - (src0 + src6)); \
80
+        OP(dst[3 * dstStride], (src3 + src4) * 20 - (src2 + src5) * 6 + (src1 + src6) * 3 - (src0 + src7)); \
81
+        OP(dst[4 * dstStride], (src4 + src5) * 20 - (src3 + src6) * 6 + (src2 + src7) * 3 - (src1 + src8)); \
82
+        OP(dst[5 * dstStride], (src5 + src6) * 20 - (src4 + src7) * 6 + (src3 + src8) * 3 - (src2 + src8)); \
83
+        OP(dst[6 * dstStride], (src6 + src7) * 20 - (src5 + src8) * 6 + (src4 + src8) * 3 - (src3 + src7)); \
84
+        OP(dst[7 * dstStride], (src7 + src8) * 20 - (src6 + src8) * 6 + (src5 + src7) * 3 - (src4 + src6)); \
85
+        dst++;                                                                \
86
+        src++;                                                                \
87
+    }                                                                         \
88
+}                                                                             \
89
+                                                                              \
90
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src,      \
91
+                                             int dstStride, int srcStride,    \
92
+                                             int h)                           \
93
+{                                                                             \
94
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
95
+    int i;                                                                    \
96
+                                                                              \
97
+    for (i = 0; i < h; i++) {                                                 \
98
+        OP(dst[0],  (src[0]  + src[1])  * 20 - (src[0]  + src[2])  * 6 + (src[1]  + src[3])  * 3 - (src[2]  + src[4]));  \
99
+        OP(dst[1],  (src[1]  + src[2])  * 20 - (src[0]  + src[3])  * 6 + (src[0]  + src[4])  * 3 - (src[1]  + src[5]));  \
100
+        OP(dst[2],  (src[2]  + src[3])  * 20 - (src[1]  + src[4])  * 6 + (src[0]  + src[5])  * 3 - (src[0]  + src[6]));  \
101
+        OP(dst[3],  (src[3]  + src[4])  * 20 - (src[2]  + src[5])  * 6 + (src[1]  + src[6])  * 3 - (src[0]  + src[7]));  \
102
+        OP(dst[4],  (src[4]  + src[5])  * 20 - (src[3]  + src[6])  * 6 + (src[2]  + src[7])  * 3 - (src[1]  + src[8]));  \
103
+        OP(dst[5],  (src[5]  + src[6])  * 20 - (src[4]  + src[7])  * 6 + (src[3]  + src[8])  * 3 - (src[2]  + src[9]));  \
104
+        OP(dst[6],  (src[6]  + src[7])  * 20 - (src[5]  + src[8])  * 6 + (src[4]  + src[9])  * 3 - (src[3]  + src[10])); \
105
+        OP(dst[7],  (src[7]  + src[8])  * 20 - (src[6]  + src[9])  * 6 + (src[5]  + src[10]) * 3 - (src[4]  + src[11])); \
106
+        OP(dst[8],  (src[8]  + src[9])  * 20 - (src[7]  + src[10]) * 6 + (src[6]  + src[11]) * 3 - (src[5]  + src[12])); \
107
+        OP(dst[9],  (src[9]  + src[10]) * 20 - (src[8]  + src[11]) * 6 + (src[7]  + src[12]) * 3 - (src[6]  + src[13])); \
108
+        OP(dst[10], (src[10] + src[11]) * 20 - (src[9]  + src[12]) * 6 + (src[8]  + src[13]) * 3 - (src[7]  + src[14])); \
109
+        OP(dst[11], (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 + (src[9]  + src[14]) * 3 - (src[8]  + src[15])); \
110
+        OP(dst[12], (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 + (src[10] + src[15]) * 3 - (src[9]  + src[16])); \
111
+        OP(dst[13], (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 + (src[11] + src[16]) * 3 - (src[10] + src[16])); \
112
+        OP(dst[14], (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 + (src[12] + src[16]) * 3 - (src[11] + src[15])); \
113
+        OP(dst[15], (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 + (src[13] + src[15]) * 3 - (src[12] + src[14])); \
114
+        dst += dstStride;                                                     \
115
+        src += srcStride;                                                     \
116
+    }                                                                         \
117
+}                                                                             \
118
+                                                                              \
119
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src,      \
120
+                                             int dstStride, int srcStride)    \
121
+{                                                                             \
122
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;                           \
123
+    const int w = 16;                                                         \
124
+    int i;                                                                    \
125
+                                                                              \
126
+    for (i = 0; i < w; i++) {                                                 \
127
+        const int src0  = src[0  * srcStride];                                \
128
+        const int src1  = src[1  * srcStride];                                \
129
+        const int src2  = src[2  * srcStride];                                \
130
+        const int src3  = src[3  * srcStride];                                \
131
+        const int src4  = src[4  * srcStride];                                \
132
+        const int src5  = src[5  * srcStride];                                \
133
+        const int src6  = src[6  * srcStride];                                \
134
+        const int src7  = src[7  * srcStride];                                \
135
+        const int src8  = src[8  * srcStride];                                \
136
+        const int src9  = src[9  * srcStride];                                \
137
+        const int src10 = src[10 * srcStride];                                \
138
+        const int src11 = src[11 * srcStride];                                \
139
+        const int src12 = src[12 * srcStride];                                \
140
+        const int src13 = src[13 * srcStride];                                \
141
+        const int src14 = src[14 * srcStride];                                \
142
+        const int src15 = src[15 * srcStride];                                \
143
+        const int src16 = src[16 * srcStride];                                \
144
+        OP(dst[0  * dstStride], (src0  + src1)  * 20 - (src0  + src2)  * 6 + (src1  + src3)  * 3 - (src2  + src4));  \
145
+        OP(dst[1  * dstStride], (src1  + src2)  * 20 - (src0  + src3)  * 6 + (src0  + src4)  * 3 - (src1  + src5));  \
146
+        OP(dst[2  * dstStride], (src2  + src3)  * 20 - (src1  + src4)  * 6 + (src0  + src5)  * 3 - (src0  + src6));  \
147
+        OP(dst[3  * dstStride], (src3  + src4)  * 20 - (src2  + src5)  * 6 + (src1  + src6)  * 3 - (src0  + src7));  \
148
+        OP(dst[4  * dstStride], (src4  + src5)  * 20 - (src3  + src6)  * 6 + (src2  + src7)  * 3 - (src1  + src8));  \
149
+        OP(dst[5  * dstStride], (src5  + src6)  * 20 - (src4  + src7)  * 6 + (src3  + src8)  * 3 - (src2  + src9));  \
150
+        OP(dst[6  * dstStride], (src6  + src7)  * 20 - (src5  + src8)  * 6 + (src4  + src9)  * 3 - (src3  + src10)); \
151
+        OP(dst[7  * dstStride], (src7  + src8)  * 20 - (src6  + src9)  * 6 + (src5  + src10) * 3 - (src4  + src11)); \
152
+        OP(dst[8  * dstStride], (src8  + src9)  * 20 - (src7  + src10) * 6 + (src6  + src11) * 3 - (src5  + src12)); \
153
+        OP(dst[9  * dstStride], (src9  + src10) * 20 - (src8  + src11) * 6 + (src7  + src12) * 3 - (src6  + src13)); \
154
+        OP(dst[10 * dstStride], (src10 + src11) * 20 - (src9  + src12) * 6 + (src8  + src13) * 3 - (src7  + src14)); \
155
+        OP(dst[11 * dstStride], (src11 + src12) * 20 - (src10 + src13) * 6 + (src9  + src14) * 3 - (src8  + src15)); \
156
+        OP(dst[12 * dstStride], (src12 + src13) * 20 - (src11 + src14) * 6 + (src10 + src15) * 3 - (src9  + src16)); \
157
+        OP(dst[13 * dstStride], (src13 + src14) * 20 - (src12 + src15) * 6 + (src11 + src16) * 3 - (src10 + src16)); \
158
+        OP(dst[14 * dstStride], (src14 + src15) * 20 - (src13 + src16) * 6 + (src12 + src16) * 3 - (src11 + src15)); \
159
+        OP(dst[15 * dstStride], (src15 + src16) * 20 - (src14 + src16) * 6 + (src13 + src15) * 3 - (src12 + src14)); \
160
+        dst++;                                                                \
161
+        src++;                                                                \
162
+    }                                                                         \
163
+}                                                                             \
164
+                                                                              \
165
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src,                \
166
+                                   ptrdiff_t stride)                          \
167
+{                                                                             \
168
+    uint8_t half[64];                                                         \
169
+                                                                              \
170
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);             \
171
+    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);             \
172
+}                                                                             \
173
+                                                                              \
174
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src,                \
175
+                                   ptrdiff_t stride)                          \
176
+{                                                                             \
177
+    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);             \
178
+}                                                                             \
179
+                                                                              \
180
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src,                \
181
+                                   ptrdiff_t stride)                          \
182
+{                                                                             \
183
+    uint8_t half[64];                                                         \
184
+                                                                              \
185
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);             \
186
+    OPNAME ## pixels8_l2_8(dst, src + 1, half, stride, stride, 8, 8);         \
187
+}                                                                             \
188
+                                                                              \
189
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src,                \
190
+                                   ptrdiff_t stride)                          \
191
+{                                                                             \
192
+    uint8_t full[16 * 9];                                                     \
193
+    uint8_t half[64];                                                         \
194
+                                                                              \
195
+    copy_block9(full, src, 16, stride, 9);                                    \
196
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);                   \
197
+    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);                \
198
+}                                                                             \
199
+                                                                              \
200
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src,                \
201
+                                   ptrdiff_t stride)                          \
202
+{                                                                             \
203
+    uint8_t full[16 * 9];                                                     \
204
+                                                                              \
205
+    copy_block9(full, src, 16, stride, 9);                                    \
206
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);                   \
207
+}                                                                             \
208
+                                                                              \
209
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src,                \
210
+                                   ptrdiff_t stride)                          \
211
+{                                                                             \
212
+    uint8_t full[16 * 9];                                                     \
213
+    uint8_t half[64];                                                         \
214
+                                                                              \
215
+    copy_block9(full, src, 16, stride, 9);                                    \
216
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);                   \
217
+    OPNAME ## pixels8_l2_8(dst, full + 16, half, stride, 16, 8, 8);           \
218
+}                                                                             \
219
+                                                                              \
220
+void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src,            \
221
+                                       ptrdiff_t stride)                      \
222
+{                                                                             \
223
+    uint8_t full[16 * 9];                                                     \
224
+    uint8_t halfH[72];                                                        \
225
+    uint8_t halfV[64];                                                        \
226
+    uint8_t halfHV[64];                                                       \
227
+                                                                              \
228
+    copy_block9(full, src, 16, stride, 9);                                    \
229
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
230
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);                  \
231
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
232
+    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV,                   \
233
+                           stride, 16, 8, 8, 8, 8);                           \
234
+}                                                                             \
235
+                                                                              \
236
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src,                \
237
+                                   ptrdiff_t stride)                          \
238
+{                                                                             \
239
+    uint8_t full[16 * 9];                                                     \
240
+    uint8_t halfH[72];                                                        \
241
+    uint8_t halfHV[64];                                                       \
242
+                                                                              \
243
+    copy_block9(full, src, 16, stride, 9);                                    \
244
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
245
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);              \
246
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
247
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);              \
248
+}                                                                             \
249
+                                                                              \
250
+void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src,            \
251
+                                       ptrdiff_t stride)                      \
252
+{                                                                             \
253
+    uint8_t full[16 * 9];                                                     \
254
+    uint8_t halfH[72];                                                        \
255
+    uint8_t halfV[64];                                                        \
256
+    uint8_t halfHV[64];                                                       \
257
+                                                                              \
258
+    copy_block9(full, src, 16, stride, 9);                                    \
259
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
260
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16);              \
261
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
262
+    OPNAME ## pixels8_l4_8(dst, full + 1, halfH, halfV, halfHV,               \
263
+                           stride, 16, 8, 8, 8, 8);                           \
264
+}                                                                             \
265
+                                                                              \
266
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src,                \
267
+                                   ptrdiff_t stride)                          \
268
+{                                                                             \
269
+    uint8_t full[16 * 9];                                                     \
270
+    uint8_t halfH[72];                                                        \
271
+    uint8_t halfHV[64];                                                       \
272
+                                                                              \
273
+    copy_block9(full, src, 16, stride, 9);                                    \
274
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
275
+    put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9);          \
276
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
277
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);              \
278
+}                                                                             \
279
+                                                                              \
280
+void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src,            \
281
+                                       ptrdiff_t stride)                      \
282
+{                                                                             \
283
+    uint8_t full[16 * 9];                                                     \
284
+    uint8_t halfH[72];                                                        \
285
+    uint8_t halfV[64];                                                        \
286
+    uint8_t halfHV[64];                                                       \
287
+                                                                              \
288
+    copy_block9(full, src, 16, stride, 9);                                    \
289
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
290
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);                  \
291
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
292
+    OPNAME ## pixels8_l4_8(dst, full + 16, halfH + 8, halfV, halfHV,          \
293
+                           stride, 16, 8, 8, 8, 8);                           \
294
+}                                                                             \
295
+                                                                              \
296
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src,                \
297
+                                   ptrdiff_t stride)                          \
298
+{                                                                             \
299
+    uint8_t full[16 * 9];                                                     \
300
+    uint8_t halfH[72];                                                        \
301
+    uint8_t halfHV[64];                                                       \
302
+                                                                              \
303
+    copy_block9(full, src, 16, stride, 9);                                    \
304
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
305
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);              \
306
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
307
+    OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8);          \
308
+}                                                                             \
309
+                                                                              \
310
+void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src,            \
311
+                                       ptrdiff_t stride)                      \
312
+{                                                                             \
313
+    uint8_t full[16 * 9];                                                     \
314
+    uint8_t halfH[72];                                                        \
315
+    uint8_t halfV[64];                                                        \
316
+    uint8_t halfHV[64];                                                       \
317
+                                                                              \
318
+    copy_block9(full, src, 16, stride, 9);                                    \
319
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
320
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16);              \
321
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
322
+    OPNAME ## pixels8_l4_8(dst, full + 17, halfH + 8, halfV, halfHV,          \
323
+                           stride, 16, 8, 8, 8, 8);                           \
324
+}                                                                             \
325
+                                                                              \
326
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src,                \
327
+                                   ptrdiff_t stride)                          \
328
+{                                                                             \
329
+    uint8_t full[16 * 9];                                                     \
330
+    uint8_t halfH[72];                                                        \
331
+    uint8_t halfHV[64];                                                       \
332
+                                                                              \
333
+    copy_block9(full, src, 16, stride, 9);                                    \
334
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
335
+    put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9);          \
336
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
337
+    OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8);          \
338
+}                                                                             \
339
+                                                                              \
340
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src,                \
341
+                                   ptrdiff_t stride)                          \
342
+{                                                                             \
343
+    uint8_t halfH[72];                                                        \
344
+    uint8_t halfHV[64];                                                       \
345
+                                                                              \
346
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);            \
347
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
348
+    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);              \
349
+}                                                                             \
350
+                                                                              \
351
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src,                \
352
+                                   ptrdiff_t stride)                          \
353
+{                                                                             \
354
+    uint8_t halfH[72];                                                        \
355
+    uint8_t halfHV[64];                                                       \
356
+                                                                              \
357
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);            \
358
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
359
+    OPNAME ## pixels8_l2_8(dst, halfH + 8, halfHV, stride, 8, 8, 8);          \
360
+}                                                                             \
361
+                                                                              \
362
+void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src,            \
363
+                                       ptrdiff_t stride)                      \
364
+{                                                                             \
365
+    uint8_t full[16 * 9];                                                     \
366
+    uint8_t halfH[72];                                                        \
367
+    uint8_t halfV[64];                                                        \
368
+    uint8_t halfHV[64];                                                       \
369
+                                                                              \
370
+    copy_block9(full, src, 16, stride, 9);                                    \
371
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
372
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);                  \
373
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
374
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);              \
375
+}                                                                             \
376
+                                                                              \
377
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src,                \
378
+                                   ptrdiff_t stride)                          \
379
+{                                                                             \
380
+    uint8_t full[16 * 9];                                                     \
381
+    uint8_t halfH[72];                                                        \
382
+                                                                              \
383
+    copy_block9(full, src, 16, stride, 9);                                    \
384
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
385
+    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);              \
386
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);                   \
387
+}                                                                             \
388
+                                                                              \
389
+void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src,            \
390
+                                       ptrdiff_t stride)                      \
391
+{                                                                             \
392
+    uint8_t full[16 * 9];                                                     \
393
+    uint8_t halfH[72];                                                        \
394
+    uint8_t halfV[64];                                                        \
395
+    uint8_t halfHV[64];                                                       \
396
+                                                                              \
397
+    copy_block9(full, src, 16, stride, 9);                                    \
398
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
399
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full + 1, 8, 16);              \
400
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);                 \
401
+    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);              \
402
+}                                                                             \
403
+                                                                              \
404
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src,                \
405
+                                   ptrdiff_t stride)                          \
406
+{                                                                             \
407
+    uint8_t full[16 * 9];                                                     \
408
+    uint8_t halfH[72];                                                        \
409
+                                                                              \
410
+    copy_block9(full, src, 16, stride, 9);                                    \
411
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);               \
412
+    put ## RND ## pixels8_l2_8(halfH, halfH, full + 1, 8, 8, 16, 9);          \
413
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);                   \
414
+}                                                                             \
415
+                                                                              \
416
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src,                \
417
+                                   ptrdiff_t stride)                          \
418
+{                                                                             \
419
+    uint8_t halfH[72];                                                        \
420
+                                                                              \
421
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);            \
422
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);                   \
423
+}                                                                             \
424
+                                                                              \
425
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src,               \
426
+                                    ptrdiff_t stride)                         \
427
+{                                                                             \
428
+    uint8_t half[256];                                                        \
429
+                                                                              \
430
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);          \
431
+    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);          \
432
+}                                                                             \
433
+                                                                              \
434
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src,               \
435
+                                    ptrdiff_t stride)                         \
436
+{                                                                             \
437
+    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);           \
438
+}                                                                             \
439
+                                                                              \
440
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src,               \
441
+                                    ptrdiff_t stride)                         \
442
+{                                                                             \
443
+    uint8_t half[256];                                                        \
444
+                                                                              \
445
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);          \
446
+    OPNAME ## pixels16_l2_8(dst, src + 1, half, stride, stride, 16, 16);      \
447
+}                                                                             \
448
+                                                                              \
449
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src,               \
450
+                                    ptrdiff_t stride)                         \
451
+{                                                                             \
452
+    uint8_t full[24 * 17];                                                    \
453
+    uint8_t half[256];                                                        \
454
+                                                                              \
455
+    copy_block17(full, src, 24, stride, 17);                                  \
456
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);                 \
457
+    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);             \
458
+}                                                                             \
459
+                                                                              \
460
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src,               \
461
+                                    ptrdiff_t stride)                         \
462
+{                                                                             \
463
+    uint8_t full[24 * 17];                                                    \
464
+                                                                              \
465
+    copy_block17(full, src, 24, stride, 17);                                  \
466
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);                  \
467
+}                                                                             \
468
+                                                                              \
469
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src,               \
470
+                                    ptrdiff_t stride)                         \
471
+{                                                                             \
472
+    uint8_t full[24 * 17];                                                    \
473
+    uint8_t half[256];                                                        \
474
+                                                                              \
475
+    copy_block17(full, src, 24, stride, 17);                                  \
476
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);                 \
477
+    OPNAME ## pixels16_l2_8(dst, full + 24, half, stride, 24, 16, 16);        \
478
+}                                                                             \
479
+                                                                              \
480
+void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src,           \
481
+                                        ptrdiff_t stride)                     \
482
+{                                                                             \
483
+    uint8_t full[24 * 17];                                                    \
484
+    uint8_t halfH[272];                                                       \
485
+    uint8_t halfV[256];                                                       \
486
+    uint8_t halfHV[256];                                                      \
487
+                                                                              \
488
+    copy_block17(full, src, 24, stride, 17);                                  \
489
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
490
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);                \
491
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
492
+    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV,                  \
493
+                            stride, 24, 16, 16, 16, 16);                      \
494
+}                                                                             \
495
+                                                                              \
496
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src,               \
497
+                                    ptrdiff_t stride)                         \
498
+{                                                                             \
499
+    uint8_t full[24 * 17];                                                    \
500
+    uint8_t halfH[272];                                                       \
501
+    uint8_t halfHV[256];                                                      \
502
+                                                                              \
503
+    copy_block17(full, src, 24, stride, 17);                                  \
504
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
505
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);          \
506
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
507
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);          \
508
+}                                                                             \
509
+                                                                              \
510
+void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src,           \
511
+                                        ptrdiff_t stride)                     \
512
+{                                                                             \
513
+    uint8_t full[24 * 17];                                                    \
514
+    uint8_t halfH[272];                                                       \
515
+    uint8_t halfV[256];                                                       \
516
+    uint8_t halfHV[256];                                                      \
517
+                                                                              \
518
+    copy_block17(full, src, 24, stride, 17);                                  \
519
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
520
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24);            \
521
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
522
+    OPNAME ## pixels16_l4_8(dst, full + 1, halfH, halfV, halfHV,              \
523
+                            stride, 24, 16, 16, 16, 16);                      \
524
+}                                                                             \
525
+                                                                              \
526
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src,               \
527
+                                    ptrdiff_t stride)                         \
528
+{                                                                             \
529
+    uint8_t full[24 * 17];                                                    \
530
+    uint8_t halfH[272];                                                       \
531
+    uint8_t halfHV[256];                                                      \
532
+                                                                              \
533
+    copy_block17(full, src, 24, stride, 17);                                  \
534
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
535
+    put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17);      \
536
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
537
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);          \
538
+}                                                                             \
539
+                                                                              \
540
+void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src,           \
541
+                                        ptrdiff_t stride)                     \
542
+{                                                                             \
543
+    uint8_t full[24 * 17];                                                    \
544
+    uint8_t halfH[272];                                                       \
545
+    uint8_t halfV[256];                                                       \
546
+    uint8_t halfHV[256];                                                      \
547
+                                                                              \
548
+    copy_block17(full, src, 24, stride, 17);                                  \
549
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
550
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);                \
551
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
552
+    OPNAME ## pixels16_l4_8(dst, full + 24, halfH + 16, halfV, halfHV,        \
553
+                            stride, 24, 16, 16, 16, 16);                      \
554
+}                                                                             \
555
+                                                                              \
556
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src,               \
557
+                                    ptrdiff_t stride)                         \
558
+{                                                                             \
559
+    uint8_t full[24 * 17];                                                    \
560
+    uint8_t halfH[272];                                                       \
561
+    uint8_t halfHV[256];                                                      \
562
+                                                                              \
563
+    copy_block17(full, src, 24, stride, 17);                                  \
564
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
565
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);          \
566
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
567
+    OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16);     \
568
+}                                                                             \
569
+                                                                              \
570
+void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src,           \
571
+                                        ptrdiff_t stride)                     \
572
+{                                                                             \
573
+    uint8_t full[24 * 17];                                                    \
574
+    uint8_t halfH[272];                                                       \
575
+    uint8_t halfV[256];                                                       \
576
+    uint8_t halfHV[256];                                                      \
577
+                                                                              \
578
+    copy_block17(full, src, 24, stride, 17);                                  \
579
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
580
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24);            \
581
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
582
+    OPNAME ## pixels16_l4_8(dst, full + 25, halfH + 16, halfV, halfHV,        \
583
+                            stride, 24, 16, 16, 16, 16);                      \
584
+}                                                                             \
585
+                                                                              \
586
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src,               \
587
+                                    ptrdiff_t stride)                         \
588
+{                                                                             \
589
+    uint8_t full[24 * 17];                                                    \
590
+    uint8_t halfH[272];                                                       \
591
+    uint8_t halfHV[256];                                                      \
592
+                                                                              \
593
+    copy_block17(full, src, 24, stride, 17);                                  \
594
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
595
+    put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17);      \
596
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
597
+    OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16);     \
598
+}                                                                             \
599
+                                                                              \
600
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src,               \
601
+                                    ptrdiff_t stride)                         \
602
+{                                                                             \
603
+    uint8_t halfH[272];                                                       \
604
+    uint8_t halfHV[256];                                                      \
605
+                                                                              \
606
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);         \
607
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
608
+    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);          \
609
+}                                                                             \
610
+                                                                              \
611
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src,               \
612
+                                    ptrdiff_t stride)                         \
613
+{                                                                             \
614
+    uint8_t halfH[272];                                                       \
615
+    uint8_t halfHV[256];                                                      \
616
+                                                                              \
617
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);         \
618
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
619
+    OPNAME ## pixels16_l2_8(dst, halfH + 16, halfHV, stride, 16, 16, 16);     \
620
+}                                                                             \
621
+                                                                              \
622
+void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src,           \
623
+                                        ptrdiff_t stride)                     \
624
+{                                                                             \
625
+    uint8_t full[24 * 17];                                                    \
626
+    uint8_t halfH[272];                                                       \
627
+    uint8_t halfV[256];                                                       \
628
+    uint8_t halfHV[256];                                                      \
629
+                                                                              \
630
+    copy_block17(full, src, 24, stride, 17);                                  \
631
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
632
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);                \
633
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
634
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);          \
635
+}                                                                             \
636
+                                                                              \
637
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src,               \
638
+                                    ptrdiff_t stride)                         \
639
+{                                                                             \
640
+    uint8_t full[24 * 17];                                                    \
641
+    uint8_t halfH[272];                                                       \
642
+                                                                              \
643
+    copy_block17(full, src, 24, stride, 17);                                  \
644
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
645
+    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);          \
646
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);                 \
647
+}                                                                             \
648
+                                                                              \
649
+void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src,           \
650
+                                        ptrdiff_t stride)                     \
651
+{                                                                             \
652
+    uint8_t full[24 * 17];                                                    \
653
+    uint8_t halfH[272];                                                       \
654
+    uint8_t halfV[256];                                                       \
655
+    uint8_t halfHV[256];                                                      \
656
+                                                                              \
657
+    copy_block17(full, src, 24, stride, 17);                                  \
658
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
659
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full + 1, 16, 24);            \
660
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);              \
661
+    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);          \
662
+}                                                                             \
663
+                                                                              \
664
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src,               \
665
+                                    ptrdiff_t stride)                         \
666
+{                                                                             \
667
+    uint8_t full[24 * 17];                                                    \
668
+    uint8_t halfH[272];                                                       \
669
+                                                                              \
670
+    copy_block17(full, src, 24, stride, 17);                                  \
671
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);            \
672
+    put ## RND ## pixels16_l2_8(halfH, halfH, full + 1, 16, 16, 24, 17);      \
673
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);                 \
674
+}                                                                             \
675
+                                                                              \
676
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src,               \
677
+                                    ptrdiff_t stride)                         \
678
+{                                                                             \
679
+    uint8_t halfH[272];                                                       \
680
+                                                                              \
681
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);         \
682
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);                 \
683
+}
684
+
685
+#define op_avg(a, b)        a = (((a) + cm[((b) + 16) >> 5] + 1) >> 1)
686
+#define op_put(a, b)        a = cm[((b) + 16) >> 5]
687
+#define op_put_no_rnd(a, b) a = cm[((b) + 15) >> 5]
688
+
689
+QPEL_MC(0, put_, _, op_put)
690
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
691
+QPEL_MC(0, avg_, _, op_avg)
692
+
693
+#undef op_avg
694
+#undef op_put
695
+#undef op_put_no_rnd
696
+
697
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
698
+{
699
+    put_pixels8_8_c(dst, src, stride, 8);
700
+}
701
+
702
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
703
+{
704
+    avg_pixels8_8_c(dst, src, stride, 8);
705
+}
706
+
707
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
708
+{
709
+    put_pixels16_8_c(dst, src, stride, 16);
710
+}
711
+
712
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
713
+{
714
+    avg_pixels16_8_c(dst, src, stride, 16);
715
+}
716
+
717
+#define put_qpel8_mc00_c         ff_put_pixels8x8_c
718
+#define avg_qpel8_mc00_c         ff_avg_pixels8x8_c
719
+#define put_qpel16_mc00_c        ff_put_pixels16x16_c
720
+#define avg_qpel16_mc00_c        ff_avg_pixels16x16_c
721
+#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
722
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
723
+
724
+void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
725
+                         int dst_stride, int src_stride1, int src_stride2,
726
+                         int h)
727
+{
728
+    put_pixels8_l2_8(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
729
+
730
+}
731
+
732
+av_cold void ff_qpeldsp_init(QpelDSPContext *c)
733
+{
734
+#define dspfunc(PFX, IDX, NUM)                              \
735
+    c->PFX ## _pixels_tab[IDX][0]  = PFX ## NUM ## _mc00_c; \
736
+    c->PFX ## _pixels_tab[IDX][1]  = PFX ## NUM ## _mc10_c; \
737
+    c->PFX ## _pixels_tab[IDX][2]  = PFX ## NUM ## _mc20_c; \
738
+    c->PFX ## _pixels_tab[IDX][3]  = PFX ## NUM ## _mc30_c; \
739
+    c->PFX ## _pixels_tab[IDX][4]  = PFX ## NUM ## _mc01_c; \
740
+    c->PFX ## _pixels_tab[IDX][5]  = PFX ## NUM ## _mc11_c; \
741
+    c->PFX ## _pixels_tab[IDX][6]  = PFX ## NUM ## _mc21_c; \
742
+    c->PFX ## _pixels_tab[IDX][7]  = PFX ## NUM ## _mc31_c; \
743
+    c->PFX ## _pixels_tab[IDX][8]  = PFX ## NUM ## _mc02_c; \
744
+    c->PFX ## _pixels_tab[IDX][9]  = PFX ## NUM ## _mc12_c; \
745
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
746
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
747
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
748
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
749
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
750
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
751
+
752
+    dspfunc(put_qpel, 0, 16);
753
+    dspfunc(put_qpel, 1, 8);
754
+
755
+    dspfunc(put_no_rnd_qpel, 0, 16);
756
+    dspfunc(put_no_rnd_qpel, 1, 8);
757
+
758
+    dspfunc(avg_qpel, 0, 16);
759
+    dspfunc(avg_qpel, 1, 8);
760
+
761
+    if (ARCH_X86)
762
+        ff_qpeldsp_init_x86(c);
763
+}
0 764
new file mode 100644
... ...
@@ -0,0 +1,78 @@
0
+/*
1
+ * quarterpel DSP functions
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+/**
21
+ * @file
22
+ * quarterpel DSP functions
23
+ */
24
+
25
+#ifndef AVCODEC_QPELDSP_H
26
+#define AVCODEC_QPELDSP_H
27
+
28
+#include <stddef.h>
29
+#include <stdint.h>
30
+
31
+void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
32
+void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
33
+void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
34
+void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride);
35
+
36
+void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
37
+                         int dst_stride, int src_stride1, int src_stride2,
38
+                         int h);
39
+
40
+#define DEF_OLD_QPEL(name)                                                     \
41
+    void ff_put_        ## name(uint8_t *dst /* align width (8 or 16) */,      \
42
+                                uint8_t *src /* align 1 */, ptrdiff_t stride); \
43
+    void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */,      \
44
+                                uint8_t *src /* align 1 */, ptrdiff_t stride); \
45
+    void ff_avg_        ## name(uint8_t *dst /* align width (8 or 16) */,      \
46
+                                uint8_t *src /* align 1 */, ptrdiff_t stride);
47
+
48
+DEF_OLD_QPEL(qpel16_mc11_old_c)
49
+DEF_OLD_QPEL(qpel16_mc31_old_c)
50
+DEF_OLD_QPEL(qpel16_mc12_old_c)
51
+DEF_OLD_QPEL(qpel16_mc32_old_c)
52
+DEF_OLD_QPEL(qpel16_mc13_old_c)
53
+DEF_OLD_QPEL(qpel16_mc33_old_c)
54
+DEF_OLD_QPEL(qpel8_mc11_old_c)
55
+DEF_OLD_QPEL(qpel8_mc31_old_c)
56
+DEF_OLD_QPEL(qpel8_mc12_old_c)
57
+DEF_OLD_QPEL(qpel8_mc32_old_c)
58
+DEF_OLD_QPEL(qpel8_mc13_old_c)
59
+DEF_OLD_QPEL(qpel8_mc33_old_c)
60
+
61
+typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
62
+                             uint8_t *src /* align 1 */, ptrdiff_t stride);
63
+
64
+/**
65
+ * quarterpel DSP context
66
+ */
67
+typedef struct QpelDSPContext {
68
+    qpel_mc_func put_qpel_pixels_tab[2][16];
69
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
70
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
71
+} QpelDSPContext;
72
+
73
+void ff_qpeldsp_init(QpelDSPContext *c);
74
+
75
+void ff_qpeldsp_init_x86(QpelDSPContext *c);
76
+
77
+#endif /* AVCODEC_QPELDSP_H */
... ...
@@ -33,6 +33,7 @@
33 33
 #include "golomb.h"
34 34
 #include "internal.h"
35 35
 #include "mathops.h"
36
+#include "qpeldsp.h"
36 37
 #include "rectangle.h"
37 38
 #include "thread.h"
38 39
 
... ...
@@ -27,8 +27,8 @@
27 27
 #ifndef AVCODEC_RV34DSP_H
28 28
 #define AVCODEC_RV34DSP_H
29 29
 
30
-#include "dsputil.h"
31 30
 #include "h264chroma.h"
31
+#include "qpeldsp.h"
32 32
 
33 33
 typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
34 34
                                  uint8_t *src1/*align width (8 or 16)*/,
... ...
@@ -33,6 +33,7 @@
33 33
 #include "mpegvideo.h"
34 34
 #include "h263.h"
35 35
 #include "h264chroma.h"
36
+#include "qpeldsp.h"
36 37
 #include "vc1.h"
37 38
 #include "vc1data.h"
38 39
 #include "vc1acdata.h"
... ...
@@ -5603,6 +5604,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
5603 5603
     if (ff_vc1_init_common(v) < 0)
5604 5604
         return -1;
5605 5605
     ff_h264chroma_init(&v->h264chroma, 8);
5606
+    ff_qpeldsp_init(&s->qdsp);
5606 5607
     ff_vc1dsp_init(&v->vc1dsp);
5607 5608
 
5608 5609
     if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
... ...
@@ -5971,8 +5973,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
5971 5971
         s->current_picture_ptr->f->repeat_pict = v->rptfrm * 2;
5972 5972
     }
5973 5973
 
5974
-    s->me.qpel_put = s->dsp.put_qpel_pixels_tab;
5975
-    s->me.qpel_avg = s->dsp.avg_qpel_pixels_tab;
5974
+    s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
5975
+    s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
5976 5976
 
5977 5977
     if (avctx->hwaccel) {
5978 5978
         if (avctx->hwaccel->start_frame(avctx, buf, buf_size) < 0)
... ...
@@ -26,8 +26,8 @@
26 26
  */
27 27
 
28 28
 #include "libavutil/common.h"
29
-#include "dsputil.h"
30 29
 #include "h264chroma.h"
30
+#include "qpeldsp.h"
31 31
 #include "vc1dsp.h"
32 32
 
33 33
 /* Apply overlap transform to horizontal edge */
... ...
@@ -21,7 +21,7 @@
21 21
 
22 22
 #include <stdint.h>
23 23
 
24
-#include "dsputil.h"
24
+#include "qpeldsp.h"
25 25
 
26 26
 typedef struct WMV2DSPContext {
27 27
     void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
... ...
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_LPC)                     += x86/lpc.o
20 20
 OBJS-$(CONFIG_MPEGAUDIODSP)            += x86/mpegaudiodsp.o
21 21
 OBJS-$(CONFIG_MPEGVIDEO)               += x86/mpegvideo.o
22 22
 OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o
23
+OBJS-$(CONFIG_QPELDSP)                 += x86/qpeldsp_init.o
23 24
 OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
24 25
 OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
25 26
 OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
... ...
@@ -44,13 +45,13 @@ OBJS-$(CONFIG_VP8_DECODER)             += x86/vp8dsp_init.o
44 44
 OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
45 45
 
46 46
 MMX-OBJS-$(CONFIG_DSPUTIL)             += x86/dsputil_mmx.o             \
47
-                                          x86/fpel_mmx.o                \
48 47
                                           x86/idct_mmx_xvid.o           \
49 48
                                           x86/idct_sse2_xvid.o          \
50 49
                                           x86/simple_idct.o
51 50
 MMX-OBJS-$(CONFIG_HPELDSP)             += x86/fpel_mmx.o                \
52 51
                                           x86/hpeldsp_mmx.o
53 52
 MMX-OBJS-$(CONFIG_HUFFYUVDSP)          += x86/huffyuvdsp_mmx.o
53
+MMX-OBJS-$(CONFIG_QPELDSP)             += x86/fpel_mmx.o
54 54
 
55 55
 MMX-OBJS-$(CONFIG_SVQ1_ENCODER)        += x86/svq1enc_mmx.o
56 56
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
... ...
@@ -61,10 +62,7 @@ YASM-OBJS                              += x86/deinterlace.o             \
61 61
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
62 62
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
63 63
 YASM-OBJS-$(CONFIG_DNXHD_ENCODER)      += x86/dnxhdenc.o
64
-YASM-OBJS-$(CONFIG_DSPUTIL)            += x86/dsputil.o                 \
65
-                                          x86/fpel.o                    \
66
-                                          x86/mpeg4qpel.o               \
67
-                                          x86/qpel.o
64
+YASM-OBJS-$(CONFIG_DSPUTIL)            += x86/dsputil.o
68 65
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc.o
69 66
 YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
70 67
 YASM-OBJS-$(CONFIG_H263DSP)            += x86/h263_loopfilter.o
... ...
@@ -86,6 +84,9 @@ YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
86 86
                                           x86/hpeldsp.o
87 87
 YASM-OBJS-$(CONFIG_HUFFYUVDSP)         += x86/huffyuvdsp.o
88 88
 YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
89
+YASM-OBJS-$(CONFIG_QPELDSP)            += x86/qpeldsp.o                 \
90
+                                          x86/fpel.o                    \
91
+                                          x86/qpel.o
89 92
 YASM-OBJS-$(CONFIG_VIDEODSP)           += x86/videodsp.o
90 93
 YASM-OBJS-$(CONFIG_VP3DSP)             += x86/vp3dsp.o
91 94
 
... ...
@@ -23,55 +23,11 @@
23 23
 #include "libavutil/x86/cpu.h"
24 24
 #include "libavcodec/avcodec.h"
25 25
 #include "libavcodec/dsputil.h"
26
-#include "libavcodec/pixels.h"
27 26
 #include "libavcodec/simple_idct.h"
28 27
 #include "libavcodec/version.h"
29 28
 #include "dsputil_x86.h"
30
-#include "fpel.h"
31 29
 #include "idct_xvid.h"
32 30
 
33
-void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
34
-                              int dstStride, int src1Stride, int h);
35
-void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
36
-                                     uint8_t *src2, int dstStride,
37
-                                     int src1Stride, int h);
38
-void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
39
-                              int dstStride, int src1Stride, int h);
40
-void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
41
-                               int dstStride, int src1Stride, int h);
42
-void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
43
-                               int dstStride, int src1Stride, int h);
44
-void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
45
-                                      int dstStride, int src1Stride, int h);
46
-void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
47
-                                          int dstStride, int srcStride, int h);
48
-void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
49
-                                          int dstStride, int srcStride, int h);
50
-void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
51
-                                                 int dstStride, int srcStride,
52
-                                                 int h);
53
-void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
54
-                                         int dstStride, int srcStride, int h);
55
-void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
56
-                                         int dstStride, int srcStride, int h);
57
-void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
58
-                                                int dstStride, int srcStride,
59
-                                                int h);
60
-void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
61
-                                          int dstStride, int srcStride);
62
-void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
63
-                                          int dstStride, int srcStride);
64
-void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
65
-                                                 int dstStride, int srcStride);
66
-void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
67
-                                         int dstStride, int srcStride);
68
-void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
69
-                                         int dstStride, int srcStride);
70
-void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
71
-                                                int dstStride, int srcStride);
72
-#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
73
-#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
74
-
75 31
 int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
76 32
                                       int order);
77 33
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
... ...
@@ -89,418 +45,6 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
89 89
 void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
90 90
                                int32_t min, int32_t max, unsigned int len);
91 91
 
92
-#if HAVE_YASM
93
-
94
-CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
95
-CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
96
-
97
-#define QPEL_OP(OPNAME, RND, MMX)                                       \
98
-static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src,    \
99
-                                         ptrdiff_t stride)              \
100
-{                                                                       \
101
-    ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \
102
-}                                                                       \
103
-                                                                        \
104
-static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src,    \
105
-                                         ptrdiff_t stride)              \
106
-{                                                                       \
107
-    uint64_t temp[8];                                                   \
108
-    uint8_t *const half = (uint8_t *) temp;                             \
109
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
110
-                                                   stride, 8);          \
111
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
112
-                                        stride, stride, 8);             \
113
-}                                                                       \
114
-                                                                        \
115
-static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src,    \
116
-                                         ptrdiff_t stride)              \
117
-{                                                                       \
118
-    ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \
119
-                                                   stride, 8);          \
120
-}                                                                       \
121
-                                                                        \
122
-static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src,    \
123
-                                         ptrdiff_t stride)              \
124
-{                                                                       \
125
-    uint64_t temp[8];                                                   \
126
-    uint8_t *const half = (uint8_t *) temp;                             \
127
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
128
-                                                   stride, 8);          \
129
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \
130
-                                        stride, 8);                     \
131
-}                                                                       \
132
-                                                                        \
133
-static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src,    \
134
-                                         ptrdiff_t stride)              \
135
-{                                                                       \
136
-    uint64_t temp[8];                                                   \
137
-    uint8_t *const half = (uint8_t *) temp;                             \
138
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
139
-                                                   8, stride);          \
140
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
141
-                                        stride, stride, 8);             \
142
-}                                                                       \
143
-                                                                        \
144
-static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src,    \
145
-                                         ptrdiff_t stride)              \
146
-{                                                                       \
147
-    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \
148
-                                                   stride, stride);     \
149
-}                                                                       \
150
-                                                                        \
151
-static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src,    \
152
-                                         ptrdiff_t stride)              \
153
-{                                                                       \
154
-    uint64_t temp[8];                                                   \
155
-    uint8_t *const half = (uint8_t *) temp;                             \
156
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
157
-                                                   8, stride);          \
158
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
159
-                                        stride, 8);                     \
160
-}                                                                       \
161
-                                                                        \
162
-static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src,    \
163
-                                         ptrdiff_t stride)              \
164
-{                                                                       \
165
-    uint64_t half[8 + 9];                                               \
166
-    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
167
-    uint8_t *const halfHV = (uint8_t *) half;                           \
168
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
169
-                                                   stride, 9);          \
170
-    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
171
-                                        stride, 9);                     \
172
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
173
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
174
-                                        stride, 8, 8);                  \
175
-}                                                                       \
176
-                                                                        \
177
-static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src,    \
178
-                                         ptrdiff_t stride)              \
179
-{                                                                       \
180
-    uint64_t half[8 + 9];                                               \
181
-    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
182
-    uint8_t *const halfHV = (uint8_t *) half;                           \
183
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
184
-                                                   stride, 9);          \
185
-    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
186
-                                        stride, 9);                     \
187
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
188
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
189
-                                        stride, 8, 8);                  \
190
-}                                                                       \
191
-                                                                        \
192
-static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src,    \
193
-                                         ptrdiff_t stride)              \
194
-{                                                                       \
195
-    uint64_t half[8 + 9];                                               \
196
-    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
197
-    uint8_t *const halfHV = (uint8_t *) half;                           \
198
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
199
-                                                   stride, 9);          \
200
-    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
201
-                                        stride, 9);                     \
202
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
203
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
204
-                                        stride, 8, 8);                  \
205
-}                                                                       \
206
-                                                                        \
207
-static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src,    \
208
-                                         ptrdiff_t stride)              \
209
-{                                                                       \
210
-    uint64_t half[8 + 9];                                               \
211
-    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
212
-    uint8_t *const halfHV = (uint8_t *) half;                           \
213
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
214
-                                                   stride, 9);          \
215
-    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
216
-                                        stride, 9);                     \
217
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
218
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
219
-                                        stride, 8, 8);                  \
220
-}                                                                       \
221
-                                                                        \
222
-static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src,    \
223
-                                         ptrdiff_t stride)              \
224
-{                                                                       \
225
-    uint64_t half[8 + 9];                                               \
226
-    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
227
-    uint8_t *const halfHV = (uint8_t *) half;                           \
228
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
229
-                                                   stride, 9);          \
230
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
231
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
232
-                                        stride, 8, 8);                  \
233
-}                                                                       \
234
-                                                                        \
235
-static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src,    \
236
-                                         ptrdiff_t stride)              \
237
-{                                                                       \
238
-    uint64_t half[8 + 9];                                               \
239
-    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
240
-    uint8_t *const halfHV = (uint8_t *) half;                           \
241
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
242
-                                                   stride, 9);          \
243
-    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
244
-    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
245
-                                        stride, 8, 8);                  \
246
-}                                                                       \
247
-                                                                        \
248
-static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src,    \
249
-                                         ptrdiff_t stride)              \
250
-{                                                                       \
251
-    uint64_t half[8 + 9];                                               \
252
-    uint8_t *const halfH = (uint8_t *) half;                            \
253
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
254
-                                                   stride, 9);          \
255
-    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \
256
-                                        8, stride, 9);                  \
257
-    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
258
-                                                   stride, 8);          \
259
-}                                                                       \
260
-                                                                        \
261
-static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src,    \
262
-                                         ptrdiff_t stride)              \
263
-{                                                                       \
264
-    uint64_t half[8 + 9];                                               \
265
-    uint8_t *const halfH = (uint8_t *) half;                            \
266
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
267
-                                                   stride, 9);          \
268
-    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
269
-                                        stride, 9);                     \
270
-    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
271
-                                                   stride, 8);          \
272
-}                                                                       \
273
-                                                                        \
274
-static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src,    \
275
-                                         ptrdiff_t stride)              \
276
-{                                                                       \
277
-    uint64_t half[9];                                                   \
278
-    uint8_t *const halfH = (uint8_t *) half;                            \
279
-    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
280
-                                                   stride, 9);          \
281
-    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
282
-                                                   stride, 8);          \
283
-}                                                                       \
284
-                                                                        \
285
-static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src,   \
286
-                                          ptrdiff_t stride)             \
287
-{                                                                       \
288
-    ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \
289
-}                                                                       \
290
-                                                                        \
291
-static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src,   \
292
-                                          ptrdiff_t stride)             \
293
-{                                                                       \
294
-    uint64_t temp[32];                                                  \
295
-    uint8_t *const half = (uint8_t *) temp;                             \
296
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
297
-                                                    stride, 16);        \
298
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
299
-                                         stride, 16);                   \
300
-}                                                                       \
301
-                                                                        \
302
-static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src,   \
303
-                                          ptrdiff_t stride)             \
304
-{                                                                       \
305
-    ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \
306
-                                                    stride, stride, 16);\
307
-}                                                                       \
308
-                                                                        \
309
-static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src,   \
310
-                                          ptrdiff_t stride)             \
311
-{                                                                       \
312
-    uint64_t temp[32];                                                  \
313
-    uint8_t *const half = (uint8_t*) temp;                              \
314
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
315
-                                                    stride, 16);        \
316
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \
317
-                                         stride, stride, 16);           \
318
-}                                                                       \
319
-                                                                        \
320
-static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src,   \
321
-                                          ptrdiff_t stride)             \
322
-{                                                                       \
323
-    uint64_t temp[32];                                                  \
324
-    uint8_t *const half = (uint8_t *) temp;                             \
325
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
326
-                                                    stride);            \
327
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
328
-                                         stride, 16);                   \
329
-}                                                                       \
330
-                                                                        \
331
-static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src,   \
332
-                                          ptrdiff_t stride)             \
333
-{                                                                       \
334
-    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \
335
-                                                    stride, stride);    \
336
-}                                                                       \
337
-                                                                        \
338
-static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src,   \
339
-                                          ptrdiff_t stride)             \
340
-{                                                                       \
341
-    uint64_t temp[32];                                                  \
342
-    uint8_t *const half = (uint8_t *) temp;                             \
343
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
344
-                                                    stride);            \
345
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \
346
-                                         stride, stride, 16);           \
347
-}                                                                       \
348
-                                                                        \
349
-static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src,   \
350
-                                          ptrdiff_t stride)             \
351
-{                                                                       \
352
-    uint64_t half[16 * 2 + 17 * 2];                                     \
353
-    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
354
-    uint8_t *const halfHV = (uint8_t *) half;                           \
355
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
356
-                                                    stride, 17);        \
357
-    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
358
-                                         stride, 17);                   \
359
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
360
-                                                    16, 16);            \
361
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
362
-                                         stride, 16, 16);               \
363
-}                                                                       \
364
-                                                                        \
365
-static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src,   \
366
-                                          ptrdiff_t stride)             \
367
-{                                                                       \
368
-    uint64_t half[16 * 2 + 17 * 2];                                     \
369
-    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
370
-    uint8_t *const halfHV = (uint8_t *) half;                           \
371
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
372
-                                                    stride, 17);        \
373
-    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
374
-                                         stride, 17);                   \
375
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
376
-                                                    16, 16);            \
377
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
378
-                                         stride, 16, 16);               \
379
-}                                                                       \
380
-                                                                        \
381
-static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src,   \
382
-                                          ptrdiff_t stride)             \
383
-{                                                                       \
384
-    uint64_t half[16 * 2 + 17 * 2];                                     \
385
-    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
386
-    uint8_t *const halfHV = (uint8_t *) half;                           \
387
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
388
-                                                    stride, 17);        \
389
-    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
390
-                                         stride, 17);                   \
391
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
392
-                                                    16, 16);            \
393
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
394
-                                         stride, 16, 16);               \
395
-}                                                                       \
396
-                                                                        \
397
-static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src,   \
398
-                                          ptrdiff_t stride)             \
399
-{                                                                       \
400
-    uint64_t half[16 * 2 + 17 * 2];                                     \
401
-    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
402
-    uint8_t *const halfHV = (uint8_t *) half;                           \
403
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
404
-                                                    stride, 17);        \
405
-    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
406
-                                         stride, 17);                   \
407
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
408
-                                                    16, 16);            \
409
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
410
-                                         stride, 16, 16);               \
411
-}                                                                       \
412
-                                                                        \
413
-static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src,   \
414
-                                          ptrdiff_t stride)             \
415
-{                                                                       \
416
-    uint64_t half[16 * 2 + 17 * 2];                                     \
417
-    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
418
-    uint8_t *const halfHV = (uint8_t *) half;                           \
419
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
420
-                                                    stride, 17);        \
421
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
422
-                                                    16, 16);            \
423
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
424
-                                         stride, 16, 16);               \
425
-}                                                                       \
426
-                                                                        \
427
-static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src,   \
428
-                                          ptrdiff_t stride)             \
429
-{                                                                       \
430
-    uint64_t half[16 * 2 + 17 * 2];                                     \
431
-    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
432
-    uint8_t *const halfHV = (uint8_t *) half;                           \
433
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
434
-                                                    stride, 17);        \
435
-    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
436
-                                                    16, 16);            \
437
-    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
438
-                                         stride, 16, 16);               \
439
-}                                                                       \
440
-                                                                        \
441
-static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src,   \
442
-                                          ptrdiff_t stride)             \
443
-{                                                                       \
444
-    uint64_t half[17 * 2];                                              \
445
-    uint8_t *const halfH = (uint8_t *) half;                            \
446
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
447
-                                                    stride, 17);        \
448
-    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
449
-                                         stride, 17);                   \
450
-    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
451
-                                                    stride, 16);        \
452
-}                                                                       \
453
-                                                                        \
454
-static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src,   \
455
-                                          ptrdiff_t stride)             \
456
-{                                                                       \
457
-    uint64_t half[17 * 2];                                              \
458
-    uint8_t *const halfH = (uint8_t *) half;                            \
459
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
460
-                                                    stride, 17);        \
461
-    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
462
-                                         stride, 17);                   \
463
-    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
464
-                                                    stride, 16);        \
465
-}                                                                       \
466
-                                                                        \
467
-static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src,   \
468
-                                          ptrdiff_t stride)             \
469
-{                                                                       \
470
-    uint64_t half[17 * 2];                                              \
471
-    uint8_t *const halfH = (uint8_t *) half;                            \
472
-    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
473
-                                                    stride, 17);        \
474
-    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
475
-                                                    stride, 16);        \
476
-}
477
-
478
-QPEL_OP(put_,        _,        mmxext)
479
-QPEL_OP(avg_,        _,        mmxext)
480
-QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
481
-
482
-#endif /* HAVE_YASM */
483
-
484
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
485
-do {                                                                         \
486
-    c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
487
-    c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
488
-    c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
489
-    c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
490
-    c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
491
-    c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
492
-    c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
493
-    c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
494
-    c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
495
-    c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
496
-    c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
497
-    c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
498
-    c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
499
-    c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
500
-    c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
501
-    c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
502
-} while (0)
503
-
504 92
 static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
505 93
                                      int cpu_flags, unsigned high_bit_depth)
506 94
 {
... ...
@@ -550,14 +94,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
550 550
 #endif /* HAVE_MMXEXT_INLINE */
551 551
 
552 552
 #if HAVE_MMXEXT_EXTERNAL
553
-    SET_QPEL_FUNCS(avg_qpel,        0, 16, mmxext, );
554
-    SET_QPEL_FUNCS(avg_qpel,        1,  8, mmxext, );
555
-
556
-    SET_QPEL_FUNCS(put_qpel,        0, 16, mmxext, );
557
-    SET_QPEL_FUNCS(put_qpel,        1,  8, mmxext, );
558
-    SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
559
-    SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmxext, );
560
-
561 553
     c->scalarproduct_int16          = ff_scalarproduct_int16_mmxext;
562 554
 #endif /* HAVE_MMXEXT_EXTERNAL */
563 555
 }
564 556
deleted file mode 100644
... ...
@@ -1,558 +0,0 @@
1
-;******************************************************************************
2
-;* mpeg4 qpel
3
-;* Copyright (c) 2008 Loren Merritt
4
-;*
5
-;* This file is part of Libav.
6
-;*
7
-;* Libav is free software; you can redistribute it and/or
8
-;* modify it under the terms of the GNU Lesser General Public
9
-;* License as published by the Free Software Foundation; either
10
-;* version 2.1 of the License, or (at your option) any later version.
11
-;*
12
-;* Libav is distributed in the hope that it will be useful,
13
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
-;* Lesser General Public License for more details.
16
-;*
17
-;* You should have received a copy of the GNU Lesser General Public
18
-;* License along with Libav; if not, write to the Free Software
19
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
-;******************************************************************************
21
-
22
-%include "libavutil/x86/x86util.asm"
23
-
24
-SECTION_RODATA
25
-cextern pb_1
26
-cextern pw_3
27
-cextern pw_15
28
-cextern pw_16
29
-cextern pw_20
30
-
31
-
32
-SECTION_TEXT
33
-
34
-; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
35
-%macro PUT_NO_RND_PIXELS8_L2 0
36
-cglobal put_no_rnd_pixels8_l2, 6,6
37
-    movsxdifnidn r4, r4d
38
-    movsxdifnidn r3, r3d
39
-    pcmpeqb      m6, m6
40
-    test        r5d, 1
41
-    je .loop
42
-    mova         m0, [r1]
43
-    mova         m1, [r2]
44
-    add          r1, r4
45
-    add          r2, 8
46
-    pxor         m0, m6
47
-    pxor         m1, m6
48
-    PAVGB        m0, m1
49
-    pxor         m0, m6
50
-    mova       [r0], m0
51
-    add          r0, r3
52
-    dec r5d
53
-.loop:
54
-    mova         m0, [r1]
55
-    add          r1, r4
56
-    mova         m1, [r1]
57
-    add          r1, r4
58
-    mova         m2, [r2]
59
-    mova         m3, [r2+8]
60
-    pxor         m0, m6
61
-    pxor         m1, m6
62
-    pxor         m2, m6
63
-    pxor         m3, m6
64
-    PAVGB        m0, m2
65
-    PAVGB        m1, m3
66
-    pxor         m0, m6
67
-    pxor         m1, m6
68
-    mova       [r0], m0
69
-    add          r0, r3
70
-    mova       [r0], m1
71
-    add          r0, r3
72
-    mova         m0, [r1]
73
-    add          r1, r4
74
-    mova         m1, [r1]
75
-    add          r1, r4
76
-    mova         m2, [r2+16]
77
-    mova         m3, [r2+24]
78
-    pxor         m0, m6
79
-    pxor         m1, m6
80
-    pxor         m2, m6
81
-    pxor         m3, m6
82
-    PAVGB        m0, m2
83
-    PAVGB        m1, m3
84
-    pxor         m0, m6
85
-    pxor         m1, m6
86
-    mova       [r0], m0
87
-    add          r0, r3
88
-    mova       [r0], m1
89
-    add          r0, r3
90
-    add          r2, 32
91
-    sub         r5d, 4
92
-    jne .loop
93
-    REP_RET
94
-%endmacro
95
-
96
-INIT_MMX mmxext
97
-PUT_NO_RND_PIXELS8_L2
98
-
99
-
100
-; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
101
-%macro PUT_NO_RND_PIXELS16_l2 0
102
-cglobal put_no_rnd_pixels16_l2, 6,6
103
-    movsxdifnidn r3, r3d
104
-    movsxdifnidn r4, r4d
105
-    pcmpeqb      m6, m6
106
-    test        r5d, 1
107
-    je .loop
108
-    mova         m0, [r1]
109
-    mova         m1, [r1+8]
110
-    mova         m2, [r2]
111
-    mova         m3, [r2+8]
112
-    pxor         m0, m6
113
-    pxor         m1, m6
114
-    pxor         m2, m6
115
-    pxor         m3, m6
116
-    PAVGB        m0, m2
117
-    PAVGB        m1, m3
118
-    pxor         m0, m6
119
-    pxor         m1, m6
120
-    add          r1, r4
121
-    add          r2, 16
122
-    mova       [r0], m0
123
-    mova     [r0+8], m1
124
-    add          r0, r3
125
-    dec r5d
126
-.loop:
127
-    mova         m0, [r1]
128
-    mova         m1, [r1+8]
129
-    add          r1, r4
130
-    mova         m2, [r2]
131
-    mova         m3, [r2+8]
132
-    pxor         m0, m6
133
-    pxor         m1, m6
134
-    pxor         m2, m6
135
-    pxor         m3, m6
136
-    PAVGB        m0, m2
137
-    PAVGB        m1, m3
138
-    pxor         m0, m6
139
-    pxor         m1, m6
140
-    mova       [r0], m0
141
-    mova     [r0+8], m1
142
-    add          r0, r3
143
-    mova         m0, [r1]
144
-    mova         m1, [r1+8]
145
-    add          r1, r4
146
-    mova         m2, [r2+16]
147
-    mova         m3, [r2+24]
148
-    pxor         m0, m6
149
-    pxor         m1, m6
150
-    pxor         m2, m6
151
-    pxor         m3, m6
152
-    PAVGB        m0, m2
153
-    PAVGB        m1, m3
154
-    pxor         m0, m6
155
-    pxor         m1, m6
156
-    mova       [r0], m0
157
-    mova     [r0+8], m1
158
-    add          r0, r3
159
-    add          r2, 32
160
-    sub         r5d, 2
161
-    jne .loop
162
-    REP_RET
163
-%endmacro
164
-
165
-INIT_MMX mmxext
166
-PUT_NO_RND_PIXELS16_l2
167
-INIT_MMX 3dnow
168
-PUT_NO_RND_PIXELS16_l2
169
-
170
-%macro MPEG4_QPEL16_H_LOWPASS 1
171
-cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
172
-    movsxdifnidn r2, r2d
173
-    movsxdifnidn r3, r3d
174
-    pxor         m7, m7
175
-.loop:
176
-    mova         m0, [r1]
177
-    mova         m1, m0
178
-    mova         m2, m0
179
-    punpcklbw    m0, m7
180
-    punpckhbw    m1, m7
181
-    pshufw       m5, m0, 0x90
182
-    pshufw       m6, m0, 0x41
183
-    mova         m3, m2
184
-    mova         m4, m2
185
-    psllq        m2, 8
186
-    psllq        m3, 16
187
-    psllq        m4, 24
188
-    punpckhbw    m2, m7
189
-    punpckhbw    m3, m7
190
-    punpckhbw    m4, m7
191
-    paddw        m5, m3
192
-    paddw        m6, m2
193
-    paddw        m5, m5
194
-    psubw        m6, m5
195
-    pshufw       m5, m0, 6
196
-    pmullw       m6, [pw_3]
197
-    paddw        m0, m4
198
-    paddw        m5, m1
199
-    pmullw       m0, [pw_20]
200
-    psubw        m0, m5
201
-    paddw        m6, [PW_ROUND]
202
-    paddw        m0, m6
203
-    psraw        m0, 5
204
-    mova    [rsp+8], m0
205
-    mova         m0, [r1+5]
206
-    mova         m5, m0
207
-    mova         m6, m0
208
-    psrlq        m0, 8
209
-    psrlq        m5, 16
210
-    punpcklbw    m0, m7
211
-    punpcklbw    m5, m7
212
-    paddw        m2, m0
213
-    paddw        m3, m5
214
-    paddw        m2, m2
215
-    psubw        m3, m2
216
-    mova         m2, m6
217
-    psrlq        m6, 24
218
-    punpcklbw    m2, m7
219
-    punpcklbw    m6, m7
220
-    pmullw       m3, [pw_3]
221
-    paddw        m1, m2
222
-    paddw        m4, m6
223
-    pmullw       m1, [pw_20]
224
-    psubw        m3, m4
225
-    paddw        m1, [PW_ROUND]
226
-    paddw        m3, m1
227
-    psraw        m3, 5
228
-    mova         m1, [rsp+8]
229
-    packuswb     m1, m3
230
-    OP_MOV     [r0], m1, m4
231
-    mova         m1, [r1+9]
232
-    mova         m4, m1
233
-    mova         m3, m1
234
-    psrlq        m1, 8
235
-    psrlq        m4, 16
236
-    punpcklbw    m1, m7
237
-    punpcklbw    m4, m7
238
-    paddw        m5, m1
239
-    paddw        m0, m4
240
-    paddw        m5, m5
241
-    psubw        m0, m5
242
-    mova         m5, m3
243
-    psrlq        m3, 24
244
-    pmullw       m0, [pw_3]
245
-    punpcklbw    m3, m7
246
-    paddw        m2, m3
247
-    psubw        m0, m2
248
-    mova         m2, m5
249
-    punpcklbw    m2, m7
250
-    punpckhbw    m5, m7
251
-    paddw        m6, m2
252
-    pmullw       m6, [pw_20]
253
-    paddw        m0, [PW_ROUND]
254
-    paddw        m0, m6
255
-    psraw        m0, 5
256
-    paddw        m3, m5
257
-    pshufw       m6, m5, 0xf9
258
-    paddw        m6, m4
259
-    pshufw       m4, m5, 0xbe
260
-    pshufw       m5, m5, 0x6f
261
-    paddw        m4, m1
262
-    paddw        m5, m2
263
-    paddw        m6, m6
264
-    psubw        m4, m6
265
-    pmullw       m3, [pw_20]
266
-    pmullw       m4, [pw_3]
267
-    psubw        m3, m5
268
-    paddw        m4, [PW_ROUND]
269
-    paddw        m4, m3
270
-    psraw        m4, 5
271
-    packuswb     m0, m4
272
-    OP_MOV   [r0+8], m0, m4
273
-    add          r1, r3
274
-    add          r0, r2
275
-    dec r4d
276
-    jne .loop
277
-    REP_RET
278
-%endmacro
279
-
280
-%macro PUT_OP 2-3
281
-    mova %1, %2
282
-%endmacro
283
-
284
-%macro AVG_OP 2-3
285
-    mova  %3, %1
286
-    pavgb %2, %3
287
-    mova  %1, %2
288
-%endmacro
289
-
290
-INIT_MMX mmxext
291
-%define PW_ROUND pw_16
292
-%define OP_MOV PUT_OP
293
-MPEG4_QPEL16_H_LOWPASS put
294
-%define PW_ROUND pw_16
295
-%define OP_MOV AVG_OP
296
-MPEG4_QPEL16_H_LOWPASS avg
297
-%define PW_ROUND pw_15
298
-%define OP_MOV PUT_OP
299
-MPEG4_QPEL16_H_LOWPASS put_no_rnd
300
-
301
-
302
-
303
-%macro MPEG4_QPEL8_H_LOWPASS 1
304
-cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
305
-    movsxdifnidn r2, r2d
306
-    movsxdifnidn r3, r3d
307
-    pxor         m7, m7
308
-.loop:
309
-    mova         m0, [r1]
310
-    mova         m1, m0
311
-    mova         m2, m0
312
-    punpcklbw    m0, m7
313
-    punpckhbw    m1, m7
314
-    pshufw       m5, m0, 0x90
315
-    pshufw       m6, m0, 0x41
316
-    mova         m3, m2
317
-    mova         m4, m2
318
-    psllq        m2, 8
319
-    psllq        m3, 16
320
-    psllq        m4, 24
321
-    punpckhbw    m2, m7
322
-    punpckhbw    m3, m7
323
-    punpckhbw    m4, m7
324
-    paddw        m5, m3
325
-    paddw        m6, m2
326
-    paddw        m5, m5
327
-    psubw        m6, m5
328
-    pshufw       m5, m0, 0x6
329
-    pmullw       m6, [pw_3]
330
-    paddw        m0, m4
331
-    paddw        m5, m1
332
-    pmullw       m0, [pw_20]
333
-    psubw        m0, m5
334
-    paddw        m6, [PW_ROUND]
335
-    paddw        m0, m6
336
-    psraw        m0, 5
337
-    movh         m5, [r1+5]
338
-    punpcklbw    m5, m7
339
-    pshufw       m6, m5, 0xf9
340
-    paddw        m1, m5
341
-    paddw        m2, m6
342
-    pshufw       m6, m5, 0xbe
343
-    pshufw       m5, m5, 0x6f
344
-    paddw        m3, m6
345
-    paddw        m4, m5
346
-    paddw        m2, m2
347
-    psubw        m3, m2
348
-    pmullw       m1, [pw_20]
349
-    pmullw       m3, [pw_3]
350
-    psubw        m3, m4
351
-    paddw        m1, [PW_ROUND]
352
-    paddw        m3, m1
353
-    psraw        m3, 5
354
-    packuswb     m0, m3
355
-    OP_MOV     [r0], m0, m4
356
-    add          r1, r3
357
-    add          r0, r2
358
-    dec r4d
359
-    jne .loop
360
-    REP_RET
361
-%endmacro
362
-
363
-INIT_MMX mmxext
364
-%define PW_ROUND pw_16
365
-%define OP_MOV PUT_OP
366
-MPEG4_QPEL8_H_LOWPASS put
367
-%define PW_ROUND pw_16
368
-%define OP_MOV AVG_OP
369
-MPEG4_QPEL8_H_LOWPASS avg
370
-%define PW_ROUND pw_15
371
-%define OP_MOV PUT_OP
372
-MPEG4_QPEL8_H_LOWPASS put_no_rnd
373
-
374
-
375
-
376
-%macro QPEL_V_LOW 5
377
-    paddw      m0, m1
378
-    mova       m4, [pw_20]
379
-    pmullw     m4, m0
380
-    mova       m0, %4
381
-    mova       m5, %1
382
-    paddw      m5, m0
383
-    psubw      m4, m5
384
-    mova       m5, %2
385
-    mova       m6, %3
386
-    paddw      m5, m3
387
-    paddw      m6, m2
388
-    paddw      m6, m6
389
-    psubw      m5, m6
390
-    pmullw     m5, [pw_3]
391
-    paddw      m4, [PW_ROUND]
392
-    paddw      m5, m4
393
-    psraw      m5, 5
394
-    packuswb   m5, m5
395
-    OP_MOV     %5, m5, m7
396
-    SWAP 0,1,2,3
397
-%endmacro
398
-
399
-%macro MPEG4_QPEL16_V_LOWPASS 1
400
-cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
401
-    movsxdifnidn r2, r2d
402
-    movsxdifnidn r3, r3d
403
-
404
-    mov         r4d, 17
405
-    mov          r5, rsp
406
-    pxor         m7, m7
407
-.looph:
408
-    mova         m0, [r1]
409
-    mova         m1, [r1]
410
-    mova         m2, [r1+8]
411
-    mova         m3, [r1+8]
412
-    punpcklbw    m0, m7
413
-    punpckhbw    m1, m7
414
-    punpcklbw    m2, m7
415
-    punpckhbw    m3, m7
416
-    mova       [r5], m0
417
-    mova  [r5+0x88], m1
418
-    mova [r5+0x110], m2
419
-    mova [r5+0x198], m3
420
-    add          r5, 8
421
-    add          r1, r3
422
-    dec r4d
423
-    jne .looph
424
-
425
-
426
-    ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 14*dstStride
427
-    mov         r4d, 4
428
-    mov          r1, 4
429
-    neg          r2
430
-    lea          r1, [r1+r2*8]
431
-    lea          r1, [r1+r2*4]
432
-    lea          r1, [r1+r2*2]
433
-    neg          r2
434
-    mov          r5, rsp
435
-.loopv:
436
-    pxor         m7, m7
437
-    mova         m0, [r5+ 0x0]
438
-    mova         m1, [r5+ 0x8]
439
-    mova         m2, [r5+0x10]
440
-    mova         m3, [r5+0x18]
441
-    QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0]
442
-    QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2]
443
-    lea    r0, [r0+r2*2]
444
-    QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0]
445
-    QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2]
446
-    lea    r0, [r0+r2*2]
447
-    QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0]
448
-    QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x48], [r0+r2]
449
-    lea    r0, [r0+r2*2]
450
-    QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x50], [r0]
451
-    QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x58], [r0+r2]
452
-    lea    r0, [r0+r2*2]
453
-    QPEL_V_LOW [r5+0x28], [r5+0x30], [r5+0x38], [r5+0x60], [r0]
454
-    QPEL_V_LOW [r5+0x30], [r5+0x38], [r5+0x40], [r5+0x68], [r0+r2]
455
-    lea    r0, [r0+r2*2]
456
-    QPEL_V_LOW [r5+0x38], [r5+0x40], [r5+0x48], [r5+0x70], [r0]
457
-    QPEL_V_LOW [r5+0x40], [r5+0x48], [r5+0x50], [r5+0x78], [r0+r2]
458
-    lea    r0, [r0+r2*2]
459
-    QPEL_V_LOW [r5+0x48], [r5+0x50], [r5+0x58], [r5+0x80], [r0]
460
-    QPEL_V_LOW [r5+0x50], [r5+0x58], [r5+0x60], [r5+0x80], [r0+r2]
461
-    lea    r0, [r0+r2*2]
462
-    QPEL_V_LOW [r5+0x58], [r5+0x60], [r5+0x68], [r5+0x78], [r0]
463
-    QPEL_V_LOW [r5+0x60], [r5+0x68], [r5+0x70], [r5+0x70], [r0+r2]
464
-
465
-    add    r5, 0x88
466
-    add    r0, r1
467
-    dec r4d
468
-    jne .loopv
469
-    REP_RET
470
-%endmacro
471
-
472
-%macro PUT_OPH 2-3
473
-    movh %1, %2
474
-%endmacro
475
-
476
-%macro AVG_OPH 2-3
477
-    movh  %3, %1
478
-    pavgb %2, %3
479
-    movh  %1, %2
480
-%endmacro
481
-
482
-INIT_MMX mmxext
483
-%define PW_ROUND pw_16
484
-%define OP_MOV PUT_OPH
485
-MPEG4_QPEL16_V_LOWPASS put
486
-%define PW_ROUND pw_16
487
-%define OP_MOV AVG_OPH
488
-MPEG4_QPEL16_V_LOWPASS avg
489
-%define PW_ROUND pw_15
490
-%define OP_MOV PUT_OPH
491
-MPEG4_QPEL16_V_LOWPASS put_no_rnd
492
-
493
-
494
-
495
-%macro MPEG4_QPEL8_V_LOWPASS 1
496
-cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
497
-    movsxdifnidn r2, r2d
498
-    movsxdifnidn r3, r3d
499
-
500
-    mov         r4d, 9
501
-    mov          r5, rsp
502
-    pxor         m7, m7
503
-.looph:
504
-    mova         m0, [r1]
505
-    mova         m1, [r1]
506
-    punpcklbw    m0, m7
507
-    punpckhbw    m1, m7
508
-    mova       [r5], m0
509
-    mova  [r5+0x48], m1
510
-    add          r5, 8
511
-    add          r1, r3
512
-    dec r4d
513
-    jne .looph
514
-
515
-
516
-    ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 6*dstStride
517
-    mov         r4d, 2
518
-    mov          r1, 4
519
-    neg          r2
520
-    lea          r1, [r1+r2*4]
521
-    lea          r1, [r1+r2*2]
522
-    neg          r2
523
-    mov          r5, rsp
524
-.loopv:
525
-    pxor         m7, m7
526
-    mova         m0, [r5+ 0x0]
527
-    mova         m1, [r5+ 0x8]
528
-    mova         m2, [r5+0x10]
529
-    mova         m3, [r5+0x18]
530
-    QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0]
531
-    QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2]
532
-    lea    r0, [r0+r2*2]
533
-    QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0]
534
-    QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2]
535
-    lea    r0, [r0+r2*2]
536
-    QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0]
537
-    QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x40], [r0+r2]
538
-    lea    r0, [r0+r2*2]
539
-    QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x38], [r0]
540
-    QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x30], [r0+r2]
541
-
542
-    add    r5, 0x48
543
-    add    r0, r1
544
-    dec r4d
545
-    jne .loopv
546
-    REP_RET
547
-%endmacro
548
-
549
-INIT_MMX mmxext
550
-%define PW_ROUND pw_16
551
-%define OP_MOV PUT_OPH
552
-MPEG4_QPEL8_V_LOWPASS put
553
-%define PW_ROUND pw_16
554
-%define OP_MOV AVG_OPH
555
-MPEG4_QPEL8_V_LOWPASS avg
556
-%define PW_ROUND pw_15
557
-%define OP_MOV PUT_OPH
558
-MPEG4_QPEL8_V_LOWPASS put_no_rnd
559 1
new file mode 100644
... ...
@@ -0,0 +1,559 @@
0
+;******************************************************************************
1
+;* quarterpel DSP functions
2
+;*
3
+;* Copyright (c) 2008 Loren Merritt
4
+;*
5
+;* This file is part of Libav.
6
+;*
7
+;* Libav is free software; you can redistribute it and/or
8
+;* modify it under the terms of the GNU Lesser General Public
9
+;* License as published by the Free Software Foundation; either
10
+;* version 2.1 of the License, or (at your option) any later version.
11
+;*
12
+;* Libav is distributed in the hope that it will be useful,
13
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+;* Lesser General Public License for more details.
16
+;*
17
+;* You should have received a copy of the GNU Lesser General Public
18
+;* License along with Libav; if not, write to the Free Software
19
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+;******************************************************************************
21
+
22
+%include "libavutil/x86/x86util.asm"
23
+
24
+SECTION_RODATA
25
+cextern pb_1
26
+cextern pw_3
27
+cextern pw_15
28
+cextern pw_16
29
+cextern pw_20
30
+
31
+
32
+SECTION_TEXT
33
+
34
+; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
35
+%macro PUT_NO_RND_PIXELS8_L2 0
36
+cglobal put_no_rnd_pixels8_l2, 6,6
37
+    movsxdifnidn r4, r4d
38
+    movsxdifnidn r3, r3d
39
+    pcmpeqb      m6, m6
40
+    test        r5d, 1
41
+    je .loop
42
+    mova         m0, [r1]
43
+    mova         m1, [r2]
44
+    add          r1, r4
45
+    add          r2, 8
46
+    pxor         m0, m6
47
+    pxor         m1, m6
48
+    PAVGB        m0, m1
49
+    pxor         m0, m6
50
+    mova       [r0], m0
51
+    add          r0, r3
52
+    dec r5d
53
+.loop:
54
+    mova         m0, [r1]
55
+    add          r1, r4
56
+    mova         m1, [r1]
57
+    add          r1, r4
58
+    mova         m2, [r2]
59
+    mova         m3, [r2+8]
60
+    pxor         m0, m6
61
+    pxor         m1, m6
62
+    pxor         m2, m6
63
+    pxor         m3, m6
64
+    PAVGB        m0, m2
65
+    PAVGB        m1, m3
66
+    pxor         m0, m6
67
+    pxor         m1, m6
68
+    mova       [r0], m0
69
+    add          r0, r3
70
+    mova       [r0], m1
71
+    add          r0, r3
72
+    mova         m0, [r1]
73
+    add          r1, r4
74
+    mova         m1, [r1]
75
+    add          r1, r4
76
+    mova         m2, [r2+16]
77
+    mova         m3, [r2+24]
78
+    pxor         m0, m6
79
+    pxor         m1, m6
80
+    pxor         m2, m6
81
+    pxor         m3, m6
82
+    PAVGB        m0, m2
83
+    PAVGB        m1, m3
84
+    pxor         m0, m6
85
+    pxor         m1, m6
86
+    mova       [r0], m0
87
+    add          r0, r3
88
+    mova       [r0], m1
89
+    add          r0, r3
90
+    add          r2, 32
91
+    sub         r5d, 4
92
+    jne .loop
93
+    REP_RET
94
+%endmacro
95
+
96
+INIT_MMX mmxext
97
+PUT_NO_RND_PIXELS8_L2
98
+
99
+
100
+; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
101
+%macro PUT_NO_RND_PIXELS16_l2 0
102
+cglobal put_no_rnd_pixels16_l2, 6,6
103
+    movsxdifnidn r3, r3d
104
+    movsxdifnidn r4, r4d
105
+    pcmpeqb      m6, m6
106
+    test        r5d, 1
107
+    je .loop
108
+    mova         m0, [r1]
109
+    mova         m1, [r1+8]
110
+    mova         m2, [r2]
111
+    mova         m3, [r2+8]
112
+    pxor         m0, m6
113
+    pxor         m1, m6
114
+    pxor         m2, m6
115
+    pxor         m3, m6
116
+    PAVGB        m0, m2
117
+    PAVGB        m1, m3
118
+    pxor         m0, m6
119
+    pxor         m1, m6
120
+    add          r1, r4
121
+    add          r2, 16
122
+    mova       [r0], m0
123
+    mova     [r0+8], m1
124
+    add          r0, r3
125
+    dec r5d
126
+.loop:
127
+    mova         m0, [r1]
128
+    mova         m1, [r1+8]
129
+    add          r1, r4
130
+    mova         m2, [r2]
131
+    mova         m3, [r2+8]
132
+    pxor         m0, m6
133
+    pxor         m1, m6
134
+    pxor         m2, m6
135
+    pxor         m3, m6
136
+    PAVGB        m0, m2
137
+    PAVGB        m1, m3
138
+    pxor         m0, m6
139
+    pxor         m1, m6
140
+    mova       [r0], m0
141
+    mova     [r0+8], m1
142
+    add          r0, r3
143
+    mova         m0, [r1]
144
+    mova         m1, [r1+8]
145
+    add          r1, r4
146
+    mova         m2, [r2+16]
147
+    mova         m3, [r2+24]
148
+    pxor         m0, m6
149
+    pxor         m1, m6
150
+    pxor         m2, m6
151
+    pxor         m3, m6
152
+    PAVGB        m0, m2
153
+    PAVGB        m1, m3
154
+    pxor         m0, m6
155
+    pxor         m1, m6
156
+    mova       [r0], m0
157
+    mova     [r0+8], m1
158
+    add          r0, r3
159
+    add          r2, 32
160
+    sub         r5d, 2
161
+    jne .loop
162
+    REP_RET
163
+%endmacro
164
+
165
+INIT_MMX mmxext
166
+PUT_NO_RND_PIXELS16_l2
167
+INIT_MMX 3dnow
168
+PUT_NO_RND_PIXELS16_l2
169
+
170
+%macro MPEG4_QPEL16_H_LOWPASS 1
171
+cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16
172
+    movsxdifnidn r2, r2d
173
+    movsxdifnidn r3, r3d
174
+    pxor         m7, m7
175
+.loop:
176
+    mova         m0, [r1]
177
+    mova         m1, m0
178
+    mova         m2, m0
179
+    punpcklbw    m0, m7
180
+    punpckhbw    m1, m7
181
+    pshufw       m5, m0, 0x90
182
+    pshufw       m6, m0, 0x41
183
+    mova         m3, m2
184
+    mova         m4, m2
185
+    psllq        m2, 8
186
+    psllq        m3, 16
187
+    psllq        m4, 24
188
+    punpckhbw    m2, m7
189
+    punpckhbw    m3, m7
190
+    punpckhbw    m4, m7
191
+    paddw        m5, m3
192
+    paddw        m6, m2
193
+    paddw        m5, m5
194
+    psubw        m6, m5
195
+    pshufw       m5, m0, 6
196
+    pmullw       m6, [pw_3]
197
+    paddw        m0, m4
198
+    paddw        m5, m1
199
+    pmullw       m0, [pw_20]
200
+    psubw        m0, m5
201
+    paddw        m6, [PW_ROUND]
202
+    paddw        m0, m6
203
+    psraw        m0, 5
204
+    mova    [rsp+8], m0
205
+    mova         m0, [r1+5]
206
+    mova         m5, m0
207
+    mova         m6, m0
208
+    psrlq        m0, 8
209
+    psrlq        m5, 16
210
+    punpcklbw    m0, m7
211
+    punpcklbw    m5, m7
212
+    paddw        m2, m0
213
+    paddw        m3, m5
214
+    paddw        m2, m2
215
+    psubw        m3, m2
216
+    mova         m2, m6
217
+    psrlq        m6, 24
218
+    punpcklbw    m2, m7
219
+    punpcklbw    m6, m7
220
+    pmullw       m3, [pw_3]
221
+    paddw        m1, m2
222
+    paddw        m4, m6
223
+    pmullw       m1, [pw_20]
224
+    psubw        m3, m4
225
+    paddw        m1, [PW_ROUND]
226
+    paddw        m3, m1
227
+    psraw        m3, 5
228
+    mova         m1, [rsp+8]
229
+    packuswb     m1, m3
230
+    OP_MOV     [r0], m1, m4
231
+    mova         m1, [r1+9]
232
+    mova         m4, m1
233
+    mova         m3, m1
234
+    psrlq        m1, 8
235
+    psrlq        m4, 16
236
+    punpcklbw    m1, m7
237
+    punpcklbw    m4, m7
238
+    paddw        m5, m1
239
+    paddw        m0, m4
240
+    paddw        m5, m5
241
+    psubw        m0, m5
242
+    mova         m5, m3
243
+    psrlq        m3, 24
244
+    pmullw       m0, [pw_3]
245
+    punpcklbw    m3, m7
246
+    paddw        m2, m3
247
+    psubw        m0, m2
248
+    mova         m2, m5
249
+    punpcklbw    m2, m7
250
+    punpckhbw    m5, m7
251
+    paddw        m6, m2
252
+    pmullw       m6, [pw_20]
253
+    paddw        m0, [PW_ROUND]
254
+    paddw        m0, m6
255
+    psraw        m0, 5
256
+    paddw        m3, m5
257
+    pshufw       m6, m5, 0xf9
258
+    paddw        m6, m4
259
+    pshufw       m4, m5, 0xbe
260
+    pshufw       m5, m5, 0x6f
261
+    paddw        m4, m1
262
+    paddw        m5, m2
263
+    paddw        m6, m6
264
+    psubw        m4, m6
265
+    pmullw       m3, [pw_20]
266
+    pmullw       m4, [pw_3]
267
+    psubw        m3, m5
268
+    paddw        m4, [PW_ROUND]
269
+    paddw        m4, m3
270
+    psraw        m4, 5
271
+    packuswb     m0, m4
272
+    OP_MOV   [r0+8], m0, m4
273
+    add          r1, r3
274
+    add          r0, r2
275
+    dec r4d
276
+    jne .loop
277
+    REP_RET
278
+%endmacro
279
+
280
+%macro PUT_OP 2-3
281
+    mova %1, %2
282
+%endmacro
283
+
284
+%macro AVG_OP 2-3
285
+    mova  %3, %1
286
+    pavgb %2, %3
287
+    mova  %1, %2
288
+%endmacro
289
+
290
+INIT_MMX mmxext
291
+%define PW_ROUND pw_16
292
+%define OP_MOV PUT_OP
293
+MPEG4_QPEL16_H_LOWPASS put
294
+%define PW_ROUND pw_16
295
+%define OP_MOV AVG_OP
296
+MPEG4_QPEL16_H_LOWPASS avg
297
+%define PW_ROUND pw_15
298
+%define OP_MOV PUT_OP
299
+MPEG4_QPEL16_H_LOWPASS put_no_rnd
300
+
301
+
302
+
303
+%macro MPEG4_QPEL8_H_LOWPASS 1
304
+cglobal %1_mpeg4_qpel8_h_lowpass, 5, 5, 0, 8
305
+    movsxdifnidn r2, r2d
306
+    movsxdifnidn r3, r3d
307
+    pxor         m7, m7
308
+.loop:
309
+    mova         m0, [r1]
310
+    mova         m1, m0
311
+    mova         m2, m0
312
+    punpcklbw    m0, m7
313
+    punpckhbw    m1, m7
314
+    pshufw       m5, m0, 0x90
315
+    pshufw       m6, m0, 0x41
316
+    mova         m3, m2
317
+    mova         m4, m2
318
+    psllq        m2, 8
319
+    psllq        m3, 16
320
+    psllq        m4, 24
321
+    punpckhbw    m2, m7
322
+    punpckhbw    m3, m7
323
+    punpckhbw    m4, m7
324
+    paddw        m5, m3
325
+    paddw        m6, m2
326
+    paddw        m5, m5
327
+    psubw        m6, m5
328
+    pshufw       m5, m0, 0x6
329
+    pmullw       m6, [pw_3]
330
+    paddw        m0, m4
331
+    paddw        m5, m1
332
+    pmullw       m0, [pw_20]
333
+    psubw        m0, m5
334
+    paddw        m6, [PW_ROUND]
335
+    paddw        m0, m6
336
+    psraw        m0, 5
337
+    movh         m5, [r1+5]
338
+    punpcklbw    m5, m7
339
+    pshufw       m6, m5, 0xf9
340
+    paddw        m1, m5
341
+    paddw        m2, m6
342
+    pshufw       m6, m5, 0xbe
343
+    pshufw       m5, m5, 0x6f
344
+    paddw        m3, m6
345
+    paddw        m4, m5
346
+    paddw        m2, m2
347
+    psubw        m3, m2
348
+    pmullw       m1, [pw_20]
349
+    pmullw       m3, [pw_3]
350
+    psubw        m3, m4
351
+    paddw        m1, [PW_ROUND]
352
+    paddw        m3, m1
353
+    psraw        m3, 5
354
+    packuswb     m0, m3
355
+    OP_MOV     [r0], m0, m4
356
+    add          r1, r3
357
+    add          r0, r2
358
+    dec r4d
359
+    jne .loop
360
+    REP_RET
361
+%endmacro
362
+
363
+INIT_MMX mmxext
364
+%define PW_ROUND pw_16
365
+%define OP_MOV PUT_OP
366
+MPEG4_QPEL8_H_LOWPASS put
367
+%define PW_ROUND pw_16
368
+%define OP_MOV AVG_OP
369
+MPEG4_QPEL8_H_LOWPASS avg
370
+%define PW_ROUND pw_15
371
+%define OP_MOV PUT_OP
372
+MPEG4_QPEL8_H_LOWPASS put_no_rnd
373
+
374
+
375
+
376
+%macro QPEL_V_LOW 5
377
+    paddw      m0, m1
378
+    mova       m4, [pw_20]
379
+    pmullw     m4, m0
380
+    mova       m0, %4
381
+    mova       m5, %1
382
+    paddw      m5, m0
383
+    psubw      m4, m5
384
+    mova       m5, %2
385
+    mova       m6, %3
386
+    paddw      m5, m3
387
+    paddw      m6, m2
388
+    paddw      m6, m6
389
+    psubw      m5, m6
390
+    pmullw     m5, [pw_3]
391
+    paddw      m4, [PW_ROUND]
392
+    paddw      m5, m4
393
+    psraw      m5, 5
394
+    packuswb   m5, m5
395
+    OP_MOV     %5, m5, m7
396
+    SWAP 0,1,2,3
397
+%endmacro
398
+
399
+%macro MPEG4_QPEL16_V_LOWPASS 1
400
+cglobal %1_mpeg4_qpel16_v_lowpass, 4, 6, 0, 544
401
+    movsxdifnidn r2, r2d
402
+    movsxdifnidn r3, r3d
403
+
404
+    mov         r4d, 17
405
+    mov          r5, rsp
406
+    pxor         m7, m7
407
+.looph:
408
+    mova         m0, [r1]
409
+    mova         m1, [r1]
410
+    mova         m2, [r1+8]
411
+    mova         m3, [r1+8]
412
+    punpcklbw    m0, m7
413
+    punpckhbw    m1, m7
414
+    punpcklbw    m2, m7
415
+    punpckhbw    m3, m7
416
+    mova       [r5], m0
417
+    mova  [r5+0x88], m1
418
+    mova [r5+0x110], m2
419
+    mova [r5+0x198], m3
420
+    add          r5, 8
421
+    add          r1, r3
422
+    dec r4d
423
+    jne .looph
424
+
425
+
426
+    ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 14*dstStride
427
+    mov         r4d, 4
428
+    mov          r1, 4
429
+    neg          r2
430
+    lea          r1, [r1+r2*8]
431
+    lea          r1, [r1+r2*4]
432
+    lea          r1, [r1+r2*2]
433
+    neg          r2
434
+    mov          r5, rsp
435
+.loopv:
436
+    pxor         m7, m7
437
+    mova         m0, [r5+ 0x0]
438
+    mova         m1, [r5+ 0x8]
439
+    mova         m2, [r5+0x10]
440
+    mova         m3, [r5+0x18]
441
+    QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0]
442
+    QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2]
443
+    lea    r0, [r0+r2*2]
444
+    QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0]
445
+    QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2]
446
+    lea    r0, [r0+r2*2]
447
+    QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0]
448
+    QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x48], [r0+r2]
449
+    lea    r0, [r0+r2*2]
450
+    QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x50], [r0]
451
+    QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x58], [r0+r2]
452
+    lea    r0, [r0+r2*2]
453
+    QPEL_V_LOW [r5+0x28], [r5+0x30], [r5+0x38], [r5+0x60], [r0]
454
+    QPEL_V_LOW [r5+0x30], [r5+0x38], [r5+0x40], [r5+0x68], [r0+r2]
455
+    lea    r0, [r0+r2*2]
456
+    QPEL_V_LOW [r5+0x38], [r5+0x40], [r5+0x48], [r5+0x70], [r0]
457
+    QPEL_V_LOW [r5+0x40], [r5+0x48], [r5+0x50], [r5+0x78], [r0+r2]
458
+    lea    r0, [r0+r2*2]
459
+    QPEL_V_LOW [r5+0x48], [r5+0x50], [r5+0x58], [r5+0x80], [r0]
460
+    QPEL_V_LOW [r5+0x50], [r5+0x58], [r5+0x60], [r5+0x80], [r0+r2]
461
+    lea    r0, [r0+r2*2]
462
+    QPEL_V_LOW [r5+0x58], [r5+0x60], [r5+0x68], [r5+0x78], [r0]
463
+    QPEL_V_LOW [r5+0x60], [r5+0x68], [r5+0x70], [r5+0x70], [r0+r2]
464
+
465
+    add    r5, 0x88
466
+    add    r0, r1
467
+    dec r4d
468
+    jne .loopv
469
+    REP_RET
470
+%endmacro
471
+
472
+%macro PUT_OPH 2-3
473
+    movh %1, %2
474
+%endmacro
475
+
476
+%macro AVG_OPH 2-3
477
+    movh  %3, %1
478
+    pavgb %2, %3
479
+    movh  %1, %2
480
+%endmacro
481
+
482
+INIT_MMX mmxext
483
+%define PW_ROUND pw_16
484
+%define OP_MOV PUT_OPH
485
+MPEG4_QPEL16_V_LOWPASS put
486
+%define PW_ROUND pw_16
487
+%define OP_MOV AVG_OPH
488
+MPEG4_QPEL16_V_LOWPASS avg
489
+%define PW_ROUND pw_15
490
+%define OP_MOV PUT_OPH
491
+MPEG4_QPEL16_V_LOWPASS put_no_rnd
492
+
493
+
494
+
495
+%macro MPEG4_QPEL8_V_LOWPASS 1
496
+cglobal %1_mpeg4_qpel8_v_lowpass, 4, 6, 0, 288
497
+    movsxdifnidn r2, r2d
498
+    movsxdifnidn r3, r3d
499
+
500
+    mov         r4d, 9
501
+    mov          r5, rsp
502
+    pxor         m7, m7
503
+.looph:
504
+    mova         m0, [r1]
505
+    mova         m1, [r1]
506
+    punpcklbw    m0, m7
507
+    punpckhbw    m1, m7
508
+    mova       [r5], m0
509
+    mova  [r5+0x48], m1
510
+    add          r5, 8
511
+    add          r1, r3
512
+    dec r4d
513
+    jne .looph
514
+
515
+
516
+    ; NOTE: r1 CHANGES VALUES: r1 -> 4 - 6*dstStride
517
+    mov         r4d, 2
518
+    mov          r1, 4
519
+    neg          r2
520
+    lea          r1, [r1+r2*4]
521
+    lea          r1, [r1+r2*2]
522
+    neg          r2
523
+    mov          r5, rsp
524
+.loopv:
525
+    pxor         m7, m7
526
+    mova         m0, [r5+ 0x0]
527
+    mova         m1, [r5+ 0x8]
528
+    mova         m2, [r5+0x10]
529
+    mova         m3, [r5+0x18]
530
+    QPEL_V_LOW [r5+0x10], [r5+ 0x8], [r5+ 0x0], [r5+0x20], [r0]
531
+    QPEL_V_LOW [r5+ 0x8], [r5+ 0x0], [r5+ 0x0], [r5+0x28], [r0+r2]
532
+    lea    r0, [r0+r2*2]
533
+    QPEL_V_LOW [r5+ 0x0], [r5+ 0x0], [r5+ 0x8], [r5+0x30], [r0]
534
+    QPEL_V_LOW [r5+ 0x0], [r5+ 0x8], [r5+0x10], [r5+0x38], [r0+r2]
535
+    lea    r0, [r0+r2*2]
536
+    QPEL_V_LOW [r5+ 0x8], [r5+0x10], [r5+0x18], [r5+0x40], [r0]
537
+    QPEL_V_LOW [r5+0x10], [r5+0x18], [r5+0x20], [r5+0x40], [r0+r2]
538
+    lea    r0, [r0+r2*2]
539
+    QPEL_V_LOW [r5+0x18], [r5+0x20], [r5+0x28], [r5+0x38], [r0]
540
+    QPEL_V_LOW [r5+0x20], [r5+0x28], [r5+0x30], [r5+0x30], [r0+r2]
541
+
542
+    add    r5, 0x48
543
+    add    r0, r1
544
+    dec r4d
545
+    jne .loopv
546
+    REP_RET
547
+%endmacro
548
+
549
+INIT_MMX mmxext
550
+%define PW_ROUND pw_16
551
+%define OP_MOV PUT_OPH
552
+MPEG4_QPEL8_V_LOWPASS put
553
+%define PW_ROUND pw_16
554
+%define OP_MOV AVG_OPH
555
+MPEG4_QPEL8_V_LOWPASS avg
556
+%define PW_ROUND pw_15
557
+%define OP_MOV PUT_OPH
558
+MPEG4_QPEL8_V_LOWPASS put_no_rnd
0 559
new file mode 100644
... ...
@@ -0,0 +1,501 @@
0
+/*
1
+ * quarterpel DSP functions
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include <stddef.h>
21
+#include <stdint.h>
22
+
23
+#include "config.h"
24
+#include "libavutil/attributes.h"
25
+#include "libavutil/cpu.h"
26
+#include "libavutil/x86/cpu.h"
27
+#include "libavcodec/pixels.h"
28
+#include "libavcodec/qpeldsp.h"
29
+#include "fpel.h"
30
+
31
+void ff_put_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
32
+                              int dstStride, int src1Stride, int h);
33
+void ff_put_no_rnd_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1,
34
+                                     uint8_t *src2, int dstStride,
35
+                                     int src1Stride, int h);
36
+void ff_avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
37
+                              int dstStride, int src1Stride, int h);
38
+void ff_put_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
39
+                               int dstStride, int src1Stride, int h);
40
+void ff_avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
41
+                               int dstStride, int src1Stride, int h);
42
+void ff_put_no_rnd_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
43
+                                      int dstStride, int src1Stride, int h);
44
+void ff_put_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
45
+                                          int dstStride, int srcStride, int h);
46
+void ff_avg_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
47
+                                          int dstStride, int srcStride, int h);
48
+void ff_put_no_rnd_mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
49
+                                                 int dstStride, int srcStride,
50
+                                                 int h);
51
+void ff_put_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
52
+                                         int dstStride, int srcStride, int h);
53
+void ff_avg_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
54
+                                         int dstStride, int srcStride, int h);
55
+void ff_put_no_rnd_mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, uint8_t *src,
56
+                                                int dstStride, int srcStride,
57
+                                                int h);
58
+void ff_put_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
59
+                                          int dstStride, int srcStride);
60
+void ff_avg_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
61
+                                          int dstStride, int srcStride);
62
+void ff_put_no_rnd_mpeg4_qpel16_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
63
+                                                 int dstStride, int srcStride);
64
+void ff_put_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
65
+                                         int dstStride, int srcStride);
66
+void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
67
+                                         int dstStride, int srcStride);
68
+void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, uint8_t *src,
69
+                                                int dstStride, int srcStride);
70
+#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
71
+#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
72
+
73
+#if HAVE_YASM
74
+
75
+CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
76
+CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
77
+
78
+#define QPEL_OP(OPNAME, RND, MMX)                                       \
79
+static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst, uint8_t *src,    \
80
+                                         ptrdiff_t stride)              \
81
+{                                                                       \
82
+    ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);              \
83
+}                                                                       \
84
+                                                                        \
85
+static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src,    \
86
+                                         ptrdiff_t stride)              \
87
+{                                                                       \
88
+    uint64_t temp[8];                                                   \
89
+    uint8_t *const half = (uint8_t *) temp;                             \
90
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
91
+                                                   stride, 8);          \
92
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
93
+                                        stride, stride, 8);             \
94
+}                                                                       \
95
+                                                                        \
96
+static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src,    \
97
+                                         ptrdiff_t stride)              \
98
+{                                                                       \
99
+    ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride,    \
100
+                                                   stride, 8);          \
101
+}                                                                       \
102
+                                                                        \
103
+static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src,    \
104
+                                         ptrdiff_t stride)              \
105
+{                                                                       \
106
+    uint64_t temp[8];                                                   \
107
+    uint8_t *const half = (uint8_t *) temp;                             \
108
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8,        \
109
+                                                   stride, 8);          \
110
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride,     \
111
+                                        stride, 8);                     \
112
+}                                                                       \
113
+                                                                        \
114
+static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src,    \
115
+                                         ptrdiff_t stride)              \
116
+{                                                                       \
117
+    uint64_t temp[8];                                                   \
118
+    uint8_t *const half = (uint8_t *) temp;                             \
119
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
120
+                                                   8, stride);          \
121
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half,                 \
122
+                                        stride, stride, 8);             \
123
+}                                                                       \
124
+                                                                        \
125
+static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src,    \
126
+                                         ptrdiff_t stride)              \
127
+{                                                                       \
128
+    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src,            \
129
+                                                   stride, stride);     \
130
+}                                                                       \
131
+                                                                        \
132
+static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src,    \
133
+                                         ptrdiff_t stride)              \
134
+{                                                                       \
135
+    uint64_t temp[8];                                                   \
136
+    uint8_t *const half = (uint8_t *) temp;                             \
137
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src,           \
138
+                                                   8, stride);          \
139
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
140
+                                        stride, 8);                     \
141
+}                                                                       \
142
+                                                                        \
143
+static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src,    \
144
+                                         ptrdiff_t stride)              \
145
+{                                                                       \
146
+    uint64_t half[8 + 9];                                               \
147
+    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
148
+    uint8_t *const halfHV = (uint8_t *) half;                           \
149
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
150
+                                                   stride, 9);          \
151
+    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
152
+                                        stride, 9);                     \
153
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
154
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
155
+                                        stride, 8, 8);                  \
156
+}                                                                       \
157
+                                                                        \
158
+static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src,    \
159
+                                         ptrdiff_t stride)              \
160
+{                                                                       \
161
+    uint64_t half[8 + 9];                                               \
162
+    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
163
+    uint8_t *const halfHV = (uint8_t *) half;                           \
164
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
165
+                                                   stride, 9);          \
166
+    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
167
+                                        stride, 9);                     \
168
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
169
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
170
+                                        stride, 8, 8);                  \
171
+}                                                                       \
172
+                                                                        \
173
+static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src,    \
174
+                                         ptrdiff_t stride)              \
175
+{                                                                       \
176
+    uint64_t half[8 + 9];                                               \
177
+    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
178
+    uint8_t *const halfHV = (uint8_t *) half;                           \
179
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
180
+                                                   stride, 9);          \
181
+    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8,           \
182
+                                        stride, 9);                     \
183
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
184
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
185
+                                        stride, 8, 8);                  \
186
+}                                                                       \
187
+                                                                        \
188
+static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src,    \
189
+                                         ptrdiff_t stride)              \
190
+{                                                                       \
191
+    uint64_t half[8 + 9];                                               \
192
+    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
193
+    uint8_t *const halfHV = (uint8_t *) half;                           \
194
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
195
+                                                   stride, 9);          \
196
+    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
197
+                                        stride, 9);                     \
198
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
199
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
200
+                                        stride, 8, 8);                  \
201
+}                                                                       \
202
+                                                                        \
203
+static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src,    \
204
+                                         ptrdiff_t stride)              \
205
+{                                                                       \
206
+    uint64_t half[8 + 9];                                               \
207
+    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
208
+    uint8_t *const halfHV = (uint8_t *) half;                           \
209
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
210
+                                                   stride, 9);          \
211
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
212
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV,             \
213
+                                        stride, 8, 8);                  \
214
+}                                                                       \
215
+                                                                        \
216
+static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src,    \
217
+                                         ptrdiff_t stride)              \
218
+{                                                                       \
219
+    uint64_t half[8 + 9];                                               \
220
+    uint8_t *const halfH  = (uint8_t *) half + 64;                      \
221
+    uint8_t *const halfHV = (uint8_t *) half;                           \
222
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
223
+                                                   stride, 9);          \
224
+    ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
225
+    ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV,         \
226
+                                        stride, 8, 8);                  \
227
+}                                                                       \
228
+                                                                        \
229
+static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src,    \
230
+                                         ptrdiff_t stride)              \
231
+{                                                                       \
232
+    uint64_t half[8 + 9];                                               \
233
+    uint8_t *const halfH = (uint8_t *) half;                            \
234
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
235
+                                                   stride, 9);          \
236
+    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH,              \
237
+                                        8, stride, 9);                  \
238
+    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
239
+                                                   stride, 8);          \
240
+}                                                                       \
241
+                                                                        \
242
+static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src,    \
243
+                                         ptrdiff_t stride)              \
244
+{                                                                       \
245
+    uint64_t half[8 + 9];                                               \
246
+    uint8_t *const halfH = (uint8_t *) half;                            \
247
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
248
+                                                   stride, 9);          \
249
+    ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8,       \
250
+                                        stride, 9);                     \
251
+    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
252
+                                                   stride, 8);          \
253
+}                                                                       \
254
+                                                                        \
255
+static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src,    \
256
+                                         ptrdiff_t stride)              \
257
+{                                                                       \
258
+    uint64_t half[9];                                                   \
259
+    uint8_t *const halfH = (uint8_t *) half;                            \
260
+    ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8,       \
261
+                                                   stride, 9);          \
262
+    ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH,          \
263
+                                                   stride, 8);          \
264
+}                                                                       \
265
+                                                                        \
266
+static void OPNAME ## qpel16_mc00_ ## MMX(uint8_t *dst, uint8_t *src,   \
267
+                                          ptrdiff_t stride)             \
268
+{                                                                       \
269
+    ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);            \
270
+}                                                                       \
271
+                                                                        \
272
+static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src,   \
273
+                                          ptrdiff_t stride)             \
274
+{                                                                       \
275
+    uint64_t temp[32];                                                  \
276
+    uint8_t *const half = (uint8_t *) temp;                             \
277
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
278
+                                                    stride, 16);        \
279
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
280
+                                         stride, 16);                   \
281
+}                                                                       \
282
+                                                                        \
283
+static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src,   \
284
+                                          ptrdiff_t stride)             \
285
+{                                                                       \
286
+    ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src,           \
287
+                                                    stride, stride, 16);\
288
+}                                                                       \
289
+                                                                        \
290
+static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src,   \
291
+                                          ptrdiff_t stride)             \
292
+{                                                                       \
293
+    uint64_t temp[32];                                                  \
294
+    uint8_t *const half = (uint8_t*) temp;                              \
295
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16,      \
296
+                                                    stride, 16);        \
297
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half,            \
298
+                                         stride, stride, 16);           \
299
+}                                                                       \
300
+                                                                        \
301
+static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src,   \
302
+                                          ptrdiff_t stride)             \
303
+{                                                                       \
304
+    uint64_t temp[32];                                                  \
305
+    uint8_t *const half = (uint8_t *) temp;                             \
306
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
307
+                                                    stride);            \
308
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride,        \
309
+                                         stride, 16);                   \
310
+}                                                                       \
311
+                                                                        \
312
+static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src,   \
313
+                                          ptrdiff_t stride)             \
314
+{                                                                       \
315
+    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src,           \
316
+                                                    stride, stride);    \
317
+}                                                                       \
318
+                                                                        \
319
+static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src,   \
320
+                                          ptrdiff_t stride)             \
321
+{                                                                       \
322
+    uint64_t temp[32];                                                  \
323
+    uint8_t *const half = (uint8_t *) temp;                             \
324
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16,      \
325
+                                                    stride);            \
326
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half,         \
327
+                                         stride, stride, 16);           \
328
+}                                                                       \
329
+                                                                        \
330
+static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src,   \
331
+                                          ptrdiff_t stride)             \
332
+{                                                                       \
333
+    uint64_t half[16 * 2 + 17 * 2];                                     \
334
+    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
335
+    uint8_t *const halfHV = (uint8_t *) half;                           \
336
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
337
+                                                    stride, 17);        \
338
+    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
339
+                                         stride, 17);                   \
340
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
341
+                                                    16, 16);            \
342
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
343
+                                         stride, 16, 16);               \
344
+}                                                                       \
345
+                                                                        \
346
+static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src,   \
347
+                                          ptrdiff_t stride)             \
348
+{                                                                       \
349
+    uint64_t half[16 * 2 + 17 * 2];                                     \
350
+    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
351
+    uint8_t *const halfHV = (uint8_t *) half;                           \
352
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
353
+                                                    stride, 17);        \
354
+    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
355
+                                         stride, 17);                   \
356
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
357
+                                                    16, 16);            \
358
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
359
+                                         stride, 16, 16);               \
360
+}                                                                       \
361
+                                                                        \
362
+static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src,   \
363
+                                          ptrdiff_t stride)             \
364
+{                                                                       \
365
+    uint64_t half[16 * 2 + 17 * 2];                                     \
366
+    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
367
+    uint8_t *const halfHV = (uint8_t *) half;                           \
368
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
369
+                                                    stride, 17);        \
370
+    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
371
+                                         stride, 17);                   \
372
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
373
+                                                    16, 16);            \
374
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
375
+                                         stride, 16, 16);               \
376
+}                                                                       \
377
+                                                                        \
378
+static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src,   \
379
+                                          ptrdiff_t stride)             \
380
+{                                                                       \
381
+    uint64_t half[16 * 2 + 17 * 2];                                     \
382
+    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
383
+    uint8_t *const halfHV = (uint8_t *) half;                           \
384
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
385
+                                                    stride, 17);        \
386
+    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
387
+                                         stride, 17);                   \
388
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
389
+                                                    16, 16);            \
390
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
391
+                                         stride, 16, 16);               \
392
+}                                                                       \
393
+                                                                        \
394
+static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src,   \
395
+                                          ptrdiff_t stride)             \
396
+{                                                                       \
397
+    uint64_t half[16 * 2 + 17 * 2];                                     \
398
+    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
399
+    uint8_t *const halfHV = (uint8_t *) half;                           \
400
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
401
+                                                    stride, 17);        \
402
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
403
+                                                    16, 16);            \
404
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV,            \
405
+                                         stride, 16, 16);               \
406
+}                                                                       \
407
+                                                                        \
408
+static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src,   \
409
+                                          ptrdiff_t stride)             \
410
+{                                                                       \
411
+    uint64_t half[16 * 2 + 17 * 2];                                     \
412
+    uint8_t *const halfH  = (uint8_t *) half + 256;                     \
413
+    uint8_t *const halfHV = (uint8_t *) half;                           \
414
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
415
+                                                    stride, 17);        \
416
+    ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH,      \
417
+                                                    16, 16);            \
418
+    ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV,       \
419
+                                         stride, 16, 16);               \
420
+}                                                                       \
421
+                                                                        \
422
+static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src,   \
423
+                                          ptrdiff_t stride)             \
424
+{                                                                       \
425
+    uint64_t half[17 * 2];                                              \
426
+    uint8_t *const halfH = (uint8_t *) half;                            \
427
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
428
+                                                    stride, 17);        \
429
+    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16,         \
430
+                                         stride, 17);                   \
431
+    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
432
+                                                    stride, 16);        \
433
+}                                                                       \
434
+                                                                        \
435
+static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src,   \
436
+                                          ptrdiff_t stride)             \
437
+{                                                                       \
438
+    uint64_t half[17 * 2];                                              \
439
+    uint8_t *const halfH = (uint8_t *) half;                            \
440
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
441
+                                                    stride, 17);        \
442
+    ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16,     \
443
+                                         stride, 17);                   \
444
+    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
445
+                                                    stride, 16);        \
446
+}                                                                       \
447
+                                                                        \
448
+static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src,   \
449
+                                          ptrdiff_t stride)             \
450
+{                                                                       \
451
+    uint64_t half[17 * 2];                                              \
452
+    uint8_t *const halfH = (uint8_t *) half;                            \
453
+    ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16,     \
454
+                                                    stride, 17);        \
455
+    ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH,         \
456
+                                                    stride, 16);        \
457
+}
458
+
459
+QPEL_OP(put_,        _,        mmxext)
460
+QPEL_OP(avg_,        _,        mmxext)
461
+QPEL_OP(put_no_rnd_, _no_rnd_, mmxext)
462
+
463
+#endif /* HAVE_YASM */
464
+
465
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX)                          \
466
+do {                                                                         \
467
+    c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
468
+    c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
469
+    c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
470
+    c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
471
+    c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
472
+    c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
473
+    c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
474
+    c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
475
+    c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
476
+    c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
477
+    c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
478
+    c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
479
+    c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
480
+    c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
481
+    c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
482
+    c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
483
+} while (0)
484
+
485
+av_cold void ff_qpeldsp_init_x86(QpelDSPContext *c)
486
+{
487
+    int cpu_flags = av_get_cpu_flags();
488
+
489
+    if (X86_MMXEXT(cpu_flags)) {
490
+#if HAVE_MMXEXT_EXTERNAL
491
+        SET_QPEL_FUNCS(avg_qpel,        0, 16, mmxext, );
492
+        SET_QPEL_FUNCS(avg_qpel,        1,  8, mmxext, );
493
+
494
+        SET_QPEL_FUNCS(put_qpel,        0, 16, mmxext, );
495
+        SET_QPEL_FUNCS(put_qpel,        1,  8, mmxext, );
496
+        SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
497
+        SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmxext, );
498
+#endif /* HAVE_MMXEXT_EXTERNAL */
499
+    }
500
+}