Browse code

Merge commit '57f09608e1600d1cf1679885a46f5004d522d68f'

* commit '57f09608e1600d1cf1679885a46f5004d522d68f':
dsputil: Move thirdpel-related bits into their own context

Conflicts:
libavcodec/svq3.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2014/03/23 07:26:23
Showing 12 changed files
... ...
@@ -1780,6 +1780,7 @@ CONFIG_EXTRA="
1780 1780
     rtpdec
1781 1781
     rtpenc_chain
1782 1782
     sinewin
1783
+    tpeldsp
1783 1784
     videodsp
1784 1785
     vp3dsp
1785 1786
 "
... ...
@@ -2090,7 +2091,7 @@ sonic_ls_encoder_select="golomb"
2090 2090
 sp5x_decoder_select="mjpeg_decoder"
2091 2091
 svq1_decoder_select="hpeldsp"
2092 2092
 svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
2093
-svq3_decoder_select="h264_decoder hpeldsp"
2093
+svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
2094 2094
 svq3_decoder_suggest="zlib"
2095 2095
 tak_decoder_select="dsputil"
2096 2096
 theora_decoder_select="vp3_decoder"
... ...
@@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4
79 79
     Just used to work around a bug in an old libavcodec encoder version.
80 80
     Don't optimize them.
81 81
 
82
-tpel_mc_func {put,avg}_tpel_pixels_tab
83
-    Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding.
84
-
85 82
 add_bytes/diff_bytes
86 83
     For huffyuv only, optimize if you want a faster ffhuffyuv codec.
87 84
 
... ...
@@ -76,6 +76,7 @@ RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
76 76
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
77 77
 OBJS-$(CONFIG_SHARED)                  += log2_tab.o
78 78
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
79
+OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
79 80
 OBJS-$(CONFIG_VAAPI)                   += vaapi.o
80 81
 OBJS-$(CONFIG_VDPAU)                   += vdpau.o
81 82
 OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
... ...
@@ -50,6 +50,7 @@ uint32_t ff_square_tab[512] = { 0, };
50 50
 #undef BIT_DEPTH
51 51
 
52 52
 #define BIT_DEPTH 8
53
+#include "tpel_template.c"
53 54
 #include "dsputil_template.c"
54 55
 
55 56
 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
... ...
@@ -604,284 +605,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
604 604
     }
605 605
 }
606 606
 
607
-static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
608
-                                          int stride, int width, int height)
609
-{
610
-    switch (width) {
611
-    case 2:
612
-        put_pixels2_8_c(dst, src, stride, height);
613
-        break;
614
-    case 4:
615
-        put_pixels4_8_c(dst, src, stride, height);
616
-        break;
617
-    case 8:
618
-        put_pixels8_8_c(dst, src, stride, height);
619
-        break;
620
-    case 16:
621
-        put_pixels16_8_c(dst, src, stride, height);
622
-        break;
623
-    }
624
-}
625
-
626
-static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
627
-                                          int stride, int width, int height)
628
-{
629
-    int i, j;
630
-
631
-    for (i = 0; i < height; i++) {
632
-        for (j = 0; j < width; j++)
633
-            dst[j] = ((2 * src[j] + src[j + 1] + 1) *
634
-                      683) >> 11;
635
-        src += stride;
636
-        dst += stride;
637
-    }
638
-}
639
-
640
-static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
641
-                                          int stride, int width, int height)
642
-{
643
-    int i, j;
644
-
645
-    for (i = 0; i < height; i++) {
646
-        for (j = 0; j < width; j++)
647
-            dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
648
-                      683) >> 11;
649
-        src += stride;
650
-        dst += stride;
651
-    }
652
-}
653
-
654
-static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
655
-                                          int stride, int width, int height)
656
-{
657
-    int i, j;
658
-
659
-    for (i = 0; i < height; i++) {
660
-        for (j = 0; j < width; j++)
661
-            dst[j] = ((2 * src[j] + src[j + stride] + 1) *
662
-                      683) >> 11;
663
-        src += stride;
664
-        dst += stride;
665
-    }
666
-}
667
-
668
-static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
669
-                                          int stride, int width, int height)
670
-{
671
-    int i, j;
672
-
673
-    for (i = 0; i < height; i++) {
674
-        for (j = 0; j < width; j++)
675
-            dst[j] = ((4 * src[j]          + 3 * src[j + 1] +
676
-                       3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
677
-                      2731) >> 15;
678
-        src += stride;
679
-        dst += stride;
680
-    }
681
-}
682
-
683
-static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
684
-                                          int stride, int width, int height)
685
-{
686
-    int i, j;
687
-
688
-    for (i = 0; i < height; i++) {
689
-        for (j = 0; j < width; j++)
690
-            dst[j] = ((3 * src[j]          + 2 * src[j + 1] +
691
-                       4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
692
-                      2731) >> 15;
693
-        src += stride;
694
-        dst += stride;
695
-    }
696
-}
697
-
698
-static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
699
-                                          int stride, int width, int height)
700
-{
701
-    int i, j;
702
-
703
-    for (i = 0; i < height; i++) {
704
-        for (j = 0; j < width; j++)
705
-            dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
706
-                      683) >> 11;
707
-        src += stride;
708
-        dst += stride;
709
-    }
710
-}
711
-
712
-static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
713
-                                          int stride, int width, int height)
714
-{
715
-    int i, j;
716
-
717
-    for (i = 0; i < height; i++) {
718
-        for (j = 0; j < width; j++)
719
-            dst[j] = ((3 * src[j]          + 4 * src[j + 1] +
720
-                       2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
721
-                      2731) >> 15;
722
-        src += stride;
723
-        dst += stride;
724
-    }
725
-}
726
-
727
-static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
728
-                                          int stride, int width, int height)
729
-{
730
-    int i, j;
731
-
732
-    for (i = 0; i < height; i++) {
733
-        for (j = 0; j < width; j++)
734
-            dst[j] = ((2 * src[j]          + 3 * src[j + 1] +
735
-                       3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
736
-                      2731) >> 15;
737
-        src += stride;
738
-        dst += stride;
739
-    }
740
-}
741
-
742
-static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
743
-                                          int stride, int width, int height)
744
-{
745
-    switch (width) {
746
-    case 2:
747
-        avg_pixels2_8_c(dst, src, stride, height);
748
-        break;
749
-    case 4:
750
-        avg_pixels4_8_c(dst, src, stride, height);
751
-        break;
752
-    case 8:
753
-        avg_pixels8_8_c(dst, src, stride, height);
754
-        break;
755
-    case 16:
756
-        avg_pixels16_8_c(dst, src, stride, height);
757
-        break;
758
-    }
759
-}
760
-
761
-static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
762
-                                          int stride, int width, int height)
763
-{
764
-    int i, j;
765
-
766
-    for (i = 0; i < height; i++) {
767
-        for (j = 0; j < width; j++)
768
-            dst[j] = (dst[j] +
769
-                      (((2 * src[j] + src[j + 1] + 1) *
770
-                        683) >> 11) + 1) >> 1;
771
-        src += stride;
772
-        dst += stride;
773
-    }
774
-}
775
-
776
-static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
777
-                                          int stride, int width, int height)
778
-{
779
-    int i, j;
780
-
781
-    for (i = 0; i < height; i++) {
782
-        for (j = 0; j < width; j++)
783
-            dst[j] = (dst[j] +
784
-                      (((src[j] + 2 * src[j + 1] + 1) *
785
-                        683) >> 11) + 1) >> 1;
786
-        src += stride;
787
-        dst += stride;
788
-    }
789
-}
790
-
791
-static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
792
-                                          int stride, int width, int height)
793
-{
794
-    int i, j;
795
-
796
-    for (i = 0; i < height; i++) {
797
-        for (j = 0; j < width; j++)
798
-            dst[j] = (dst[j] +
799
-                      (((2 * src[j] + src[j + stride] + 1) *
800
-                        683) >> 11) + 1) >> 1;
801
-        src += stride;
802
-        dst += stride;
803
-    }
804
-}
805
-
806
-static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
807
-                                          int stride, int width, int height)
808
-{
809
-    int i, j;
810
-
811
-    for (i = 0; i < height; i++) {
812
-        for (j = 0; j < width; j++)
813
-            dst[j] = (dst[j] +
814
-                      (((4 * src[j]          + 3 * src[j + 1] +
815
-                         3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
816
-                        2731) >> 15) + 1) >> 1;
817
-        src += stride;
818
-        dst += stride;
819
-    }
820
-}
821
-
822
-static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
823
-                                          int stride, int width, int height)
824
-{
825
-    int i, j;
826
-
827
-    for (i = 0; i < height; i++) {
828
-        for (j = 0; j < width; j++)
829
-            dst[j] = (dst[j] +
830
-                      (((3 * src[j]          + 2 * src[j + 1] +
831
-                         4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
832
-                        2731) >> 15) + 1) >> 1;
833
-        src += stride;
834
-        dst += stride;
835
-    }
836
-}
837
-
838
-static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
839
-                                          int stride, int width, int height)
840
-{
841
-    int i, j;
842
-
843
-    for (i = 0; i < height; i++) {
844
-        for (j = 0; j < width; j++)
845
-            dst[j] = (dst[j] +
846
-                      (((src[j] + 2 * src[j + stride] + 1) *
847
-                        683) >> 11) + 1) >> 1;
848
-        src += stride;
849
-        dst += stride;
850
-    }
851
-}
852
-
853
-static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
854
-                                          int stride, int width, int height)
855
-{
856
-    int i, j;
857
-
858
-    for (i = 0; i < height; i++) {
859
-        for (j = 0; j < width; j++)
860
-            dst[j] = (dst[j] +
861
-                      (((3 * src[j]          + 4 * src[j + 1] +
862
-                         2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
863
-                        2731) >> 15) + 1) >> 1;
864
-        src += stride;
865
-        dst += stride;
866
-    }
867
-}
868
-
869
-static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
870
-                                          int stride, int width, int height)
871
-{
872
-    int i, j;
873
-
874
-    for (i = 0; i < height; i++) {
875
-        for (j = 0; j < width; j++)
876
-            dst[j] = (dst[j] +
877
-                      (((2 * src[j]          + 3 * src[j + 1] +
878
-                         3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
879
-                        2731) >> 15) + 1) >> 1;
880
-        src += stride;
881
-        dst += stride;
882
-    }
883
-}
884
-
885 607
 #define QPEL_MC(r, OPNAME, RND, OP)                                           \
886 608
 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src,       \
887 609
                                             int dstStride, int srcStride,     \
... ...
@@ -2974,26 +2697,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
2974 2974
     c->pix_abs[1][2] = pix_abs8_y2_c;
2975 2975
     c->pix_abs[1][3] = pix_abs8_xy2_c;
2976 2976
 
2977
-    c->put_tpel_pixels_tab[0]  = put_tpel_pixels_mc00_c;
2978
-    c->put_tpel_pixels_tab[1]  = put_tpel_pixels_mc10_c;
2979
-    c->put_tpel_pixels_tab[2]  = put_tpel_pixels_mc20_c;
2980
-    c->put_tpel_pixels_tab[4]  = put_tpel_pixels_mc01_c;
2981
-    c->put_tpel_pixels_tab[5]  = put_tpel_pixels_mc11_c;
2982
-    c->put_tpel_pixels_tab[6]  = put_tpel_pixels_mc21_c;
2983
-    c->put_tpel_pixels_tab[8]  = put_tpel_pixels_mc02_c;
2984
-    c->put_tpel_pixels_tab[9]  = put_tpel_pixels_mc12_c;
2985
-    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2986
-
2987
-    c->avg_tpel_pixels_tab[0]  = avg_tpel_pixels_mc00_c;
2988
-    c->avg_tpel_pixels_tab[1]  = avg_tpel_pixels_mc10_c;
2989
-    c->avg_tpel_pixels_tab[2]  = avg_tpel_pixels_mc20_c;
2990
-    c->avg_tpel_pixels_tab[4]  = avg_tpel_pixels_mc01_c;
2991
-    c->avg_tpel_pixels_tab[5]  = avg_tpel_pixels_mc11_c;
2992
-    c->avg_tpel_pixels_tab[6]  = avg_tpel_pixels_mc21_c;
2993
-    c->avg_tpel_pixels_tab[8]  = avg_tpel_pixels_mc02_c;
2994
-    c->avg_tpel_pixels_tab[9]  = avg_tpel_pixels_mc12_c;
2995
-    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2996
-
2997 2977
 #define dspfunc(PFX, IDX, NUM)                              \
2998 2978
     c->PFX ## _pixels_tab[IDX][0]  = PFX ## NUM ## _mc00_c; \
2999 2979
     c->PFX ## _pixels_tab[IDX][1]  = PFX ## NUM ## _mc10_c; \
... ...
@@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
71 71
  * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
72 72
  * h for op_pixels_func is limited to { width / 2, width },
73 73
  * but never larger than 16 and never smaller than 4. */
74
-typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
75
-                             const uint8_t *pixels /* align 1 */,
76
-                             int line_size, int w, int h);
77 74
 typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
78 75
                              uint8_t *src /* align 1 */, ptrdiff_t stride);
79 76
 
... ...
@@ -190,19 +187,6 @@ typedef struct DSPContext {
190 190
     int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
191 191
                              int size);
192 192
 
193
-    /**
194
-     * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
195
-     * this is an array[12] of motion compensation functions for the
196
-     * 9 thirdpel positions<br>
197
-     * *pixels_tab[xthirdpel + 4 * ythirdpel]
198
-     * @param block destination where the result is stored
199
-     * @param pixels source
200
-     * @param line_size number of bytes in a horizontal line of block
201
-     * @param h height
202
-     */
203
-    tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
204
-    tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
205
-
206 193
     qpel_mc_func put_qpel_pixels_tab[2][16];
207 194
     qpel_mc_func avg_qpel_pixels_tab[2][16];
208 195
     qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
... ...
@@ -24,6 +24,7 @@
24 24
 
25 25
 #include "bit_depth_template.c"
26 26
 #include "hpel_template.c"
27
+#include "tpel_template.c"
27 28
 
28 29
 static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
29 30
 {
... ...
@@ -22,47 +22,6 @@
22 22
 #include "pixels.h"
23 23
 
24 24
 #define DEF_HPEL(OPNAME, OP)                                            \
25
-static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block,            \
26
-                                             const uint8_t *pixels,     \
27
-                                             ptrdiff_t line_size,       \
28
-                                             int h)                     \
29
-{                                                                       \
30
-    int i;                                                              \
31
-    for (i = 0; i < h; i++) {                                           \
32
-        OP(*((pixel2 *) block), AV_RN2P(pixels));                       \
33
-        pixels += line_size;                                            \
34
-        block  += line_size;                                            \
35
-    }                                                                   \
36
-}                                                                       \
37
-                                                                        \
38
-static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block,            \
39
-                                             const uint8_t *pixels,     \
40
-                                             ptrdiff_t line_size,       \
41
-                                             int h)                     \
42
-{                                                                       \
43
-    int i;                                                              \
44
-    for (i = 0; i < h; i++) {                                           \
45
-        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
46
-        pixels += line_size;                                            \
47
-        block  += line_size;                                            \
48
-    }                                                                   \
49
-}                                                                       \
50
-                                                                        \
51
-static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block,            \
52
-                                             const uint8_t *pixels,     \
53
-                                             ptrdiff_t line_size,       \
54
-                                             int h)                     \
55
-{                                                                       \
56
-    int i;                                                              \
57
-    for (i = 0; i < h; i++) {                                           \
58
-        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
59
-        OP(*((pixel4 *) (block + 4 * sizeof(pixel))),                   \
60
-           AV_RN4P(pixels + 4 * sizeof(pixel)));                        \
61
-        pixels += line_size;                                            \
62
-        block  += line_size;                                            \
63
-    }                                                                   \
64
-}                                                                       \
65
-                                                                        \
66 25
 static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst,            \
67 26
                                                const uint8_t *src1,     \
68 27
                                                const uint8_t *src2,     \
... ...
@@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst,           \
134 134
                                 dst_stride, src_stride1,                \
135 135
                                 src_stride2, h);                        \
136 136
 }                                                                       \
137
-                                                                        \
138
-CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16),                              \
139
-               FUNCC(OPNAME ## _pixels8),                               \
140
-               8 * sizeof(pixel))
141 137
 
142 138
 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
143 139
 #define op_put(a, b) a = b
... ...
@@ -33,6 +33,7 @@
33 33
 #include "bit_depth_template.c"
34 34
 
35 35
 #include "hpel_template.c"
36
+#include "tpel_template.c"
36 37
 
37 38
 #define PIXOP2(OPNAME, OP)                                              \
38 39
 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \
... ...
@@ -54,6 +54,7 @@
54 54
 #include "golomb.h"
55 55
 #include "hpeldsp.h"
56 56
 #include "rectangle.h"
57
+#include "tpeldsp.h"
57 58
 #include "vdpau_internal.h"
58 59
 
59 60
 #if CONFIG_ZLIB
... ...
@@ -71,6 +72,7 @@
71 71
 typedef struct {
72 72
     H264Context h;
73 73
     HpelDSPContext hdsp;
74
+    TpelDSPContext tdsp;
74 75
     H264Picture *cur_pic;
75 76
     H264Picture *next_pic;
76 77
     H264Picture *last_pic;
... ...
@@ -328,9 +330,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
328 328
         src = h->edge_emu_buffer;
329 329
     }
330 330
     if (thirdpel)
331
-        (avg ? h->dsp.avg_tpel_pixels_tab
332
-             : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
333
-                                                width, height);
331
+        (avg ? s->tdsp.avg_tpel_pixels_tab
332
+             : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
333
+                                                 width, height);
334 334
     else
335 335
         (avg ? s->hdsp.avg_pixels_tab
336 336
              : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize,
... ...
@@ -356,10 +358,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
356 356
                 src = h->edge_emu_buffer;
357 357
             }
358 358
             if (thirdpel)
359
-                (avg ? h->dsp.avg_tpel_pixels_tab
360
-                     : h->dsp.put_tpel_pixels_tab)[dxy](dest, src,
361
-                                                        h->uvlinesize,
362
-                                                        width, height);
359
+                (avg ? s->tdsp.avg_tpel_pixels_tab
360
+                     : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
361
+                                                         h->uvlinesize,
362
+                                                         width, height);
363 363
             else
364 364
                 (avg ? s->hdsp.avg_pixels_tab
365 365
                      : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
... ...
@@ -887,6 +889,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
887 887
         goto fail;
888 888
 
889 889
     ff_hpeldsp_init(&s->hdsp, avctx->flags);
890
+    ff_tpeldsp_init(&s->tdsp);
891
+
890 892
     h->flags           = avctx->flags;
891 893
     h->is_complex      = 1;
892 894
     h->sps.chroma_format_idc = 1;
893 895
new file mode 100644
... ...
@@ -0,0 +1,80 @@
0
+/*
1
+ * This file is part of FFmpeg.
2
+ *
3
+ * FFmpeg is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * FFmpeg is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with FFmpeg; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include <stddef.h>
19
+#include <stdint.h>
20
+
21
+#include "libavutil/intreadwrite.h"
22
+#include "pixels.h"
23
+#include "rnd_avg.h"
24
+
25
+#include "bit_depth_template.c"
26
+
27
+#define DEF_TPEL(OPNAME, OP)                                            \
28
+static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block,            \
29
+                                             const uint8_t *pixels,     \
30
+                                             ptrdiff_t line_size,       \
31
+                                             int h)                     \
32
+{                                                                       \
33
+    int i;                                                              \
34
+    for (i = 0; i < h; i++) {                                           \
35
+        OP(*((pixel2 *) block), AV_RN2P(pixels));                       \
36
+        pixels += line_size;                                            \
37
+        block  += line_size;                                            \
38
+    }                                                                   \
39
+}                                                                       \
40
+                                                                        \
41
+static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block,            \
42
+                                             const uint8_t *pixels,     \
43
+                                             ptrdiff_t line_size,       \
44
+                                             int h)                     \
45
+{                                                                       \
46
+    int i;                                                              \
47
+    for (i = 0; i < h; i++) {                                           \
48
+        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
49
+        pixels += line_size;                                            \
50
+        block  += line_size;                                            \
51
+    }                                                                   \
52
+}                                                                       \
53
+                                                                        \
54
+static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block,            \
55
+                                             const uint8_t *pixels,     \
56
+                                             ptrdiff_t line_size,       \
57
+                                             int h)                     \
58
+{                                                                       \
59
+    int i;                                                              \
60
+    for (i = 0; i < h; i++) {                                           \
61
+        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
62
+        OP(*((pixel4 *) (block + 4 * sizeof(pixel))),                   \
63
+           AV_RN4P(pixels + 4 * sizeof(pixel)));                        \
64
+        pixels += line_size;                                            \
65
+        block  += line_size;                                            \
66
+    }                                                                   \
67
+}                                                                       \
68
+                                                                        \
69
+CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16),                              \
70
+               FUNCC(OPNAME ## _pixels8),                               \
71
+               8 * sizeof(pixel))
72
+
73
+#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
74
+#define op_put(a, b) a = b
75
+
76
+DEF_TPEL(avg, op_avg)
77
+DEF_TPEL(put, op_put)
78
+#undef op_avg
79
+#undef op_put
0 80
new file mode 100644
... ...
@@ -0,0 +1,333 @@
0
+/*
1
+ * thirdpel DSP functions
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+/**
21
+ * @file
22
+ * thirdpel DSP functions
23
+ */
24
+
25
+#include <stdint.h>
26
+
27
+#include "libavutil/attributes.h"
28
+#include "tpeldsp.h"
29
+
30
+#define BIT_DEPTH 8
31
+#include "tpel_template.c"
32
+
33
+static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
34
+                                          int stride, int width, int height)
35
+{
36
+    switch (width) {
37
+    case 2:
38
+        put_pixels2_8_c(dst, src, stride, height);
39
+        break;
40
+    case 4:
41
+        put_pixels4_8_c(dst, src, stride, height);
42
+        break;
43
+    case 8:
44
+        put_pixels8_8_c(dst, src, stride, height);
45
+        break;
46
+    case 16:
47
+        put_pixels16_8_c(dst, src, stride, height);
48
+        break;
49
+    }
50
+}
51
+
52
+static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
53
+                                          int stride, int width, int height)
54
+{
55
+    int i, j;
56
+
57
+    for (i = 0; i < height; i++) {
58
+        for (j = 0; j < width; j++)
59
+            dst[j] = ((2 * src[j] + src[j + 1] + 1) *
60
+                      683) >> 11;
61
+        src += stride;
62
+        dst += stride;
63
+    }
64
+}
65
+
66
+static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
67
+                                          int stride, int width, int height)
68
+{
69
+    int i, j;
70
+
71
+    for (i = 0; i < height; i++) {
72
+        for (j = 0; j < width; j++)
73
+            dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
74
+                      683) >> 11;
75
+        src += stride;
76
+        dst += stride;
77
+    }
78
+}
79
+
80
+static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
81
+                                          int stride, int width, int height)
82
+{
83
+    int i, j;
84
+
85
+    for (i = 0; i < height; i++) {
86
+        for (j = 0; j < width; j++)
87
+            dst[j] = ((2 * src[j] + src[j + stride] + 1) *
88
+                      683) >> 11;
89
+        src += stride;
90
+        dst += stride;
91
+    }
92
+}
93
+
94
+static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
95
+                                          int stride, int width, int height)
96
+{
97
+    int i, j;
98
+
99
+    for (i = 0; i < height; i++) {
100
+        for (j = 0; j < width; j++)
101
+            dst[j] = ((4 * src[j]          + 3 * src[j + 1] +
102
+                       3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
103
+                      2731) >> 15;
104
+        src += stride;
105
+        dst += stride;
106
+    }
107
+}
108
+
109
+static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
110
+                                          int stride, int width, int height)
111
+{
112
+    int i, j;
113
+
114
+    for (i = 0; i < height; i++) {
115
+        for (j = 0; j < width; j++)
116
+            dst[j] = ((3 * src[j]          + 2 * src[j + 1] +
117
+                       4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
118
+                      2731) >> 15;
119
+        src += stride;
120
+        dst += stride;
121
+    }
122
+}
123
+
124
+static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
125
+                                          int stride, int width, int height)
126
+{
127
+    int i, j;
128
+
129
+    for (i = 0; i < height; i++) {
130
+        for (j = 0; j < width; j++)
131
+            dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
132
+                      683) >> 11;
133
+        src += stride;
134
+        dst += stride;
135
+    }
136
+}
137
+
138
+static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
139
+                                          int stride, int width, int height)
140
+{
141
+    int i, j;
142
+
143
+    for (i = 0; i < height; i++) {
144
+        for (j = 0; j < width; j++)
145
+            dst[j] = ((3 * src[j]          + 4 * src[j + 1] +
146
+                       2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
147
+                      2731) >> 15;
148
+        src += stride;
149
+        dst += stride;
150
+    }
151
+}
152
+
153
+static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
154
+                                          int stride, int width, int height)
155
+{
156
+    int i, j;
157
+
158
+    for (i = 0; i < height; i++) {
159
+        for (j = 0; j < width; j++)
160
+            dst[j] = ((2 * src[j]          + 3 * src[j + 1] +
161
+                       3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
162
+                      2731) >> 15;
163
+        src += stride;
164
+        dst += stride;
165
+    }
166
+}
167
+
168
+static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
169
+                                          int stride, int width, int height)
170
+{
171
+    switch (width) {
172
+    case 2:
173
+        avg_pixels2_8_c(dst, src, stride, height);
174
+        break;
175
+    case 4:
176
+        avg_pixels4_8_c(dst, src, stride, height);
177
+        break;
178
+    case 8:
179
+        avg_pixels8_8_c(dst, src, stride, height);
180
+        break;
181
+    case 16:
182
+        avg_pixels16_8_c(dst, src, stride, height);
183
+        break;
184
+    }
185
+}
186
+
187
+static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
188
+                                          int stride, int width, int height)
189
+{
190
+    int i, j;
191
+
192
+    for (i = 0; i < height; i++) {
193
+        for (j = 0; j < width; j++)
194
+            dst[j] = (dst[j] +
195
+                      (((2 * src[j] + src[j + 1] + 1) *
196
+                        683) >> 11) + 1) >> 1;
197
+        src += stride;
198
+        dst += stride;
199
+    }
200
+}
201
+
202
+static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
203
+                                          int stride, int width, int height)
204
+{
205
+    int i, j;
206
+
207
+    for (i = 0; i < height; i++) {
208
+        for (j = 0; j < width; j++)
209
+            dst[j] = (dst[j] +
210
+                      (((src[j] + 2 * src[j + 1] + 1) *
211
+                        683) >> 11) + 1) >> 1;
212
+        src += stride;
213
+        dst += stride;
214
+    }
215
+}
216
+
217
+static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
218
+                                          int stride, int width, int height)
219
+{
220
+    int i, j;
221
+
222
+    for (i = 0; i < height; i++) {
223
+        for (j = 0; j < width; j++)
224
+            dst[j] = (dst[j] +
225
+                      (((2 * src[j] + src[j + stride] + 1) *
226
+                        683) >> 11) + 1) >> 1;
227
+        src += stride;
228
+        dst += stride;
229
+    }
230
+}
231
+
232
+static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
233
+                                          int stride, int width, int height)
234
+{
235
+    int i, j;
236
+
237
+    for (i = 0; i < height; i++) {
238
+        for (j = 0; j < width; j++)
239
+            dst[j] = (dst[j] +
240
+                      (((4 * src[j]          + 3 * src[j + 1] +
241
+                         3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
242
+                        2731) >> 15) + 1) >> 1;
243
+        src += stride;
244
+        dst += stride;
245
+    }
246
+}
247
+
248
+static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
249
+                                          int stride, int width, int height)
250
+{
251
+    int i, j;
252
+
253
+    for (i = 0; i < height; i++) {
254
+        for (j = 0; j < width; j++)
255
+            dst[j] = (dst[j] +
256
+                      (((3 * src[j]          + 2 * src[j + 1] +
257
+                         4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
258
+                        2731) >> 15) + 1) >> 1;
259
+        src += stride;
260
+        dst += stride;
261
+    }
262
+}
263
+
264
+static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
265
+                                          int stride, int width, int height)
266
+{
267
+    int i, j;
268
+
269
+    for (i = 0; i < height; i++) {
270
+        for (j = 0; j < width; j++)
271
+            dst[j] = (dst[j] +
272
+                      (((src[j] + 2 * src[j + stride] + 1) *
273
+                        683) >> 11) + 1) >> 1;
274
+        src += stride;
275
+        dst += stride;
276
+    }
277
+}
278
+
279
+static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
280
+                                          int stride, int width, int height)
281
+{
282
+    int i, j;
283
+
284
+    for (i = 0; i < height; i++) {
285
+        for (j = 0; j < width; j++)
286
+            dst[j] = (dst[j] +
287
+                      (((3 * src[j]          + 4 * src[j + 1] +
288
+                         2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
289
+                        2731) >> 15) + 1) >> 1;
290
+        src += stride;
291
+        dst += stride;
292
+    }
293
+}
294
+
295
+static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
296
+                                          int stride, int width, int height)
297
+{
298
+    int i, j;
299
+
300
+    for (i = 0; i < height; i++) {
301
+        for (j = 0; j < width; j++)
302
+            dst[j] = (dst[j] +
303
+                      (((2 * src[j]          + 3 * src[j + 1] +
304
+                         3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
305
+                        2731) >> 15) + 1) >> 1;
306
+        src += stride;
307
+        dst += stride;
308
+    }
309
+}
310
+
311
+av_cold void ff_tpeldsp_init(TpelDSPContext *c)
312
+{
313
+    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
314
+    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
315
+    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
316
+    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
317
+    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
318
+    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
319
+    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
320
+    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
321
+    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
322
+
323
+    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
324
+    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
325
+    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
326
+    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
327
+    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
328
+    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
329
+    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
330
+    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
331
+    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
332
+}
0 333
new file mode 100644
... ...
@@ -0,0 +1,59 @@
0
+/*
1
+ * thirdpel DSP functions
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+/**
21
+ * @file
22
+ * thirdpel DSP functions
23
+ */
24
+
25
+#ifndef AVCODEC_TPELDSP_H
26
+#define AVCODEC_TPELDSP_H
27
+
28
+#include <stdint.h>
29
+
30
+/* add and put pixel (decoding) */
31
+// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16
32
+// h for hpel_pixels_func is limited to {width/2, width} but never larger
33
+// than 16 and never smaller than 4
34
+typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
35
+                             const uint8_t *pixels /* align 1 */,
36
+                             int line_size, int w, int h);
37
+
38
+/**
39
+ * thirdpel DSP context
40
+ */
41
+typedef struct TpelDSPContext {
42
+    /**
43
+     * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
44
+     * this is an array[12] of motion compensation functions for the
45
+     * 9 thirdpel positions<br>
46
+     * *pixels_tab[xthirdpel + 4 * ythirdpel]
47
+     * @param block destination where the result is stored
48
+     * @param pixels source
49
+     * @param line_size number of bytes in a horizontal line of block
50
+     * @param h height
51
+     */
52
+    tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
53
+    tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
54
+} TpelDSPContext;
55
+
56
+void ff_tpeldsp_init(TpelDSPContext *c);
57
+
58
+#endif /* AVCODEC_TPELDSP_H */