Browse code

huffyuvencdsp: move functions only used by huffyuv from lossless_videodsp

Signed-off-by: James Almer <jamrial@gmail.com>

James Almer authored on 2017/01/08 09:10:46
Showing 14 changed files
... ...
@@ -2430,7 +2430,7 @@ hap_encoder_deps="libsnappy"
2430 2430
 hap_encoder_select="texturedspenc"
2431 2431
 hevc_decoder_select="bswapdsp cabac golomb videodsp"
2432 2432
 huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
2433
-huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llviddsp"
2433
+huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp"
2434 2434
 iac_decoder_select="imc_decoder"
2435 2435
 imc_decoder_select="bswapdsp fft mdct sinewin"
2436 2436
 indeo3_decoder_select="hpeldsp"
... ...
@@ -76,7 +76,6 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
76 76
     s->flags = avctx->flags;
77 77
 
78 78
     ff_bswapdsp_init(&s->bdsp);
79
-    ff_llviddsp_init(&s->llviddsp, avctx);
80 79
 
81 80
     s->width = avctx->width;
82 81
     s->height = avctx->height;
... ...
@@ -298,6 +298,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
298 298
         return ret;
299 299
 
300 300
     ff_huffyuvdsp_init(&s->hdsp);
301
+    ff_llviddsp_init(&s->llviddsp, avctx);
301 302
     memset(s->vlc, 0, 4 * sizeof(VLC));
302 303
 
303 304
     s->interlaced = avctx->height > 288;
... ...
@@ -43,7 +43,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
43 43
     if (s->bps <= 8) {
44 44
         s->hencdsp.diff_bytes(dst, src0, src1, w);
45 45
     } else {
46
-        s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
46
+        s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
47 47
     }
48 48
 }
49 49
 
... ...
@@ -84,7 +84,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
84 84
                 dst16[i] = temp - left;
85 85
                 left   = temp;
86 86
             }
87
-            s->llviddsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
87
+            s->hencdsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
88 88
             return src16[w-1];
89 89
         }
90 90
     }
... ...
@@ -158,7 +158,7 @@ static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s
158 158
     if (s->bps <= 8) {
159 159
         s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top);
160 160
     } else {
161
-        s->llviddsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
161
+        s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
162 162
     }
163 163
 }
164 164
 
... ...
@@ -217,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
217 217
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
218 218
 
219 219
     ff_huffyuv_common_init(avctx);
220
-    ff_huffyuvencdsp_init(&s->hencdsp);
220
+    ff_huffyuvencdsp_init(&s->hencdsp, avctx);
221 221
 
222 222
     avctx->extradata = av_mallocz(3*MAX_N + 4);
223 223
     if (s->flags&AV_CODEC_FLAG_PASS1) {
... ...
@@ -53,6 +53,32 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
53 53
         dst[i + 0] = src1[i + 0] - src2[i + 0];
54 54
 }
55 55
 
56
+static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
57
+    long i;
58
+#if !HAVE_FAST_UNALIGNED
59
+    if((long)src2 & (sizeof(long)-1)){
60
+        for(i=0; i+3<w; i+=4){
61
+            dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
62
+            dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
63
+            dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
64
+            dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
65
+        }
66
+    }else
67
+#endif
68
+    {
69
+        unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
70
+        unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
71
+
72
+        for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
73
+            long a = *(long*)(src1+i);
74
+            long b = *(long*)(src2+i);
75
+            *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
76
+        }
77
+    }
78
+    for (; i<w; i++)
79
+        dst[i] = (src1[i] - src2[i]) & mask;
80
+}
81
+
56 82
 static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
57 83
                                    const uint8_t *src2, intptr_t w,
58 84
                                    int *left, int *left_top)
... ...
@@ -74,11 +100,31 @@ static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
74 74
     *left_top = lt;
75 75
 }
76 76
 
77
-av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c)
77
+static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
78
+    int i;
79
+    uint16_t l, lt;
80
+
81
+    l  = *left;
82
+    lt = *left_top;
83
+
84
+    for(i=0; i<w; i++){
85
+        const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
86
+        lt = src1[i];
87
+        l  = src2[i];
88
+        dst[i] = (l - pred) & mask;
89
+    }
90
+
91
+    *left     = l;
92
+    *left_top = lt;
93
+}
94
+
95
+av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx)
78 96
 {
79 97
     c->diff_bytes           = diff_bytes_c;
98
+    c->diff_int16           = diff_int16_c;
80 99
     c->sub_hfyu_median_pred = sub_hfyu_median_pred_c;
100
+    c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
81 101
 
82 102
     if (ARCH_X86)
83
-        ff_huffyuvencdsp_init_x86(c);
103
+        ff_huffyuvencdsp_init_x86(c, avctx);
84 104
 }
... ...
@@ -21,11 +21,18 @@
21 21
 
22 22
 #include <stdint.h>
23 23
 
24
+#include "avcodec.h"
25
+
24 26
 typedef struct HuffYUVEncDSPContext {
25 27
     void (*diff_bytes)(uint8_t *dst /* align 16 */,
26 28
                        const uint8_t *src1 /* align 16 */,
27 29
                        const uint8_t *src2 /* align 1 */,
28 30
                        intptr_t w);
31
+    void (*diff_int16)(uint16_t *dst /* align 16 */,
32
+                       const uint16_t *src1 /* align 16 */,
33
+                       const uint16_t *src2 /* align 1 */,
34
+                       unsigned mask, int w);
35
+
29 36
     /**
30 37
      * Subtract HuffYUV's variant of median prediction.
31 38
      * Note, this might read from src1[-1], src2[-1].
... ...
@@ -33,9 +40,12 @@ typedef struct HuffYUVEncDSPContext {
33 33
     void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1,
34 34
                                  const uint8_t *src2, intptr_t w,
35 35
                                  int *left, int *left_top);
36
+    void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1,
37
+                                       const uint16_t *src2, unsigned mask,
38
+                                       int w, int *left, int *left_top);
36 39
 } HuffYUVEncDSPContext;
37 40
 
38
-void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c);
39
-void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c);
41
+void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx);
42
+void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx);
40 43
 
41 44
 #endif /* AVCODEC_HUFFYUVENCDSP_H */
... ...
@@ -92,32 +92,6 @@ static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w
92 92
         dst[i] = (dst[i] + src[i]) & mask;
93 93
 }
94 94
 
95
-static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
96
-    long i;
97
-#if !HAVE_FAST_UNALIGNED
98
-    if((long)src2 & (sizeof(long)-1)){
99
-        for(i=0; i+3<w; i+=4){
100
-            dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
101
-            dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
102
-            dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
103
-            dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
104
-        }
105
-    }else
106
-#endif
107
-    {
108
-        unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
109
-        unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
110
-
111
-        for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
112
-            long a = *(long*)(src1+i);
113
-            long b = *(long*)(src2+i);
114
-            *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
115
-        }
116
-    }
117
-    for (; i<w; i++)
118
-        dst[i] = (src1[i] - src2[i]) & mask;
119
-}
120
-
121 95
 static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){
122 96
     int i;
123 97
     uint16_t l, lt;
... ...
@@ -135,24 +109,6 @@ static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, con
135 135
     *left_top = lt;
136 136
 }
137 137
 
138
-static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
139
-    int i;
140
-    uint16_t l, lt;
141
-
142
-    l  = *left;
143
-    lt = *left_top;
144
-
145
-    for(i=0; i<w; i++){
146
-        const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
147
-        lt = src1[i];
148
-        l  = src2[i];
149
-        dst[i] = (l - pred) & mask;
150
-    }
151
-
152
-    *left     = l;
153
-    *left_top = lt;
154
-}
155
-
156 138
 static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){
157 139
     int i;
158 140
 
... ...
@@ -180,10 +136,8 @@ void ff_llviddsp_init(LLVidDSPContext *c, AVCodecContext *avctx)
180 180
     c->add_left_pred              = add_left_pred_c;
181 181
 
182 182
     c->add_int16 = add_int16_c;
183
-    c->diff_int16= diff_int16_c;
184 183
     c->add_hfyu_left_pred_int16   = add_hfyu_left_pred_int16_c;
185 184
     c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c;
186
-    c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
187 185
 
188 186
     if (ARCH_X86)
189 187
         ff_llviddsp_init_x86(c, avctx);
... ...
@@ -35,9 +35,7 @@ typedef struct LLVidDSPContext {
35 35
                          intptr_t w, int left);
36 36
 
37 37
     void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
38
-    void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
39 38
 
40
-    void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
41 39
     void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
42 40
     int  (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left);
43 41
 } LLVidDSPContext;
... ...
@@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
1015 1015
 FF_ENABLE_DEPRECATION_WARNINGS
1016 1016
 #endif
1017 1017
 
1018
-    ff_huffyuvencdsp_init(&s->hdsp);
1018
+    ff_huffyuvencdsp_init(&s->hdsp, avctx);
1019 1019
 
1020 1020
 #if FF_API_PRIVATE_OPT
1021 1021
 FF_DISABLE_DEPRECATION_WARNINGS
... ...
@@ -120,7 +120,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
120 120
     }
121 121
 
122 122
     ff_bswapdsp_init(&c->bdsp);
123
-    ff_huffyuvencdsp_init(&c->hdsp);
123
+    ff_huffyuvencdsp_init(&c->hdsp, avctx);
124 124
 
125 125
 #if FF_API_PRIVATE_OPT
126 126
 FF_DISABLE_DEPRECATION_WARNINGS
... ...
@@ -148,3 +148,116 @@ DIFF_BYTES_PROLOGUE
148 148
     DIFF_BYTES_BODY    u, u
149 149
 %undef i
150 150
 %endif
151
+
152
+%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
153
+    movd    m4, maskd
154
+    SPLATW  m4, m4
155
+    add     wd, wd
156
+    test    wq, 2*mmsize - 1
157
+    jz %%.tomainloop
158
+    push  tmpq
159
+%%.wordloop:
160
+    sub     wq, 2
161
+%ifidn %2, add
162
+    mov   tmpw, [srcq+wq]
163
+    add   tmpw, [dstq+wq]
164
+%else
165
+    mov   tmpw, [src1q+wq]
166
+    sub   tmpw, [src2q+wq]
167
+%endif
168
+    and   tmpw, maskw
169
+    mov     [dstq+wq], tmpw
170
+    test    wq, 2*mmsize - 1
171
+    jnz %%.wordloop
172
+    pop   tmpq
173
+%%.tomainloop:
174
+%ifidn %2, add
175
+    add     srcq, wq
176
+%else
177
+    add     src1q, wq
178
+    add     src2q, wq
179
+%endif
180
+    add     dstq, wq
181
+    neg     wq
182
+    jz      %%.end
183
+%%.loop:
184
+%ifidn %2, add
185
+    mov%1   m0, [srcq+wq]
186
+    mov%1   m1, [dstq+wq]
187
+    mov%1   m2, [srcq+wq+mmsize]
188
+    mov%1   m3, [dstq+wq+mmsize]
189
+%else
190
+    mov%1   m0, [src1q+wq]
191
+    mov%1   m1, [src2q+wq]
192
+    mov%1   m2, [src1q+wq+mmsize]
193
+    mov%1   m3, [src2q+wq+mmsize]
194
+%endif
195
+    p%2w    m0, m1
196
+    p%2w    m2, m3
197
+    pand    m0, m4
198
+    pand    m2, m4
199
+    mov%1   [dstq+wq]       , m0
200
+    mov%1   [dstq+wq+mmsize], m2
201
+    add     wq, 2*mmsize
202
+    jl %%.loop
203
+%%.end:
204
+    RET
205
+%endmacro
206
+
207
+%if ARCH_X86_32
208
+INIT_MMX mmx
209
+cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
210
+    INT16_LOOP a, sub
211
+%endif
212
+
213
+INIT_XMM sse2
214
+cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
215
+    test src1q, mmsize-1
216
+    jnz .unaligned
217
+    test src2q, mmsize-1
218
+    jnz .unaligned
219
+    test dstq, mmsize-1
220
+    jnz .unaligned
221
+    INT16_LOOP a, sub
222
+.unaligned:
223
+    INT16_LOOP u, sub
224
+
225
+INIT_MMX mmxext
226
+cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
227
+    add      wd, wd
228
+    movd    mm7, maskd
229
+    SPLATW  mm7, mm7
230
+    movq    mm0, [src1q]
231
+    movq    mm2, [src2q]
232
+    psllq   mm0, 16
233
+    psllq   mm2, 16
234
+    movd    mm6, [left_topq]
235
+    por     mm0, mm6
236
+    movd    mm6, [leftq]
237
+    por     mm2, mm6
238
+    xor     maskq, maskq
239
+.loop:
240
+    movq    mm1, [src1q + maskq]
241
+    movq    mm3, [src2q + maskq]
242
+    movq    mm4, mm2
243
+    psubw   mm2, mm0
244
+    paddw   mm2, mm1
245
+    pand    mm2, mm7
246
+    movq    mm5, mm4
247
+    pmaxsw  mm4, mm1
248
+    pminsw  mm1, mm5
249
+    pminsw  mm4, mm2
250
+    pmaxsw  mm4, mm1
251
+    psubw   mm3, mm4
252
+    pand    mm3, mm7
253
+    movq    [dstq + maskq], mm3
254
+    add     maskq, 8
255
+    movq    mm0, [src1q + maskq - 2]
256
+    movq    mm2, [src2q + maskq - 2]
257
+    cmp     maskq, wq
258
+        jb .loop
259
+    movzx maskd, word [src1q + wq - 2]
260
+    mov [left_topq], maskd
261
+    movzx maskd, word [src2q + wq - 2]
262
+    mov [leftq], maskd
263
+    RET
... ...
@@ -24,6 +24,7 @@
24 24
 
25 25
 #include "libavutil/attributes.h"
26 26
 #include "libavutil/cpu.h"
27
+#include "libavutil/pixdesc.h"
27 28
 #include "libavutil/x86/asm.h"
28 29
 #include "libavutil/x86/cpu.h"
29 30
 #include "libavcodec/huffyuvencdsp.h"
... ...
@@ -35,6 +36,12 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
35 35
                         intptr_t w);
36 36
 void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
37 37
                         intptr_t w);
38
+void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
39
+                        unsigned mask, int w);
40
+void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
41
+                        unsigned mask, int w);
42
+void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2,
43
+                                          unsigned mask, int w, int *left, int *left_top);
38 44
 
39 45
 #if HAVE_INLINE_ASM
40 46
 
... ...
@@ -80,12 +87,14 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
80 80
 
81 81
 #endif /* HAVE_INLINE_ASM */
82 82
 
83
-av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
83
+av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx)
84 84
 {
85 85
     av_unused int cpu_flags = av_get_cpu_flags();
86
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
86 87
 
87 88
     if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
88 89
         c->diff_bytes = ff_diff_bytes_mmx;
90
+        c->diff_int16 = ff_diff_int16_mmx;
89 91
     }
90 92
 
91 93
 #if HAVE_INLINE_ASM
... ...
@@ -94,8 +103,13 @@ av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
94 94
     }
95 95
 #endif /* HAVE_INLINE_ASM */
96 96
 
97
+    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
98
+        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
99
+    }
100
+
97 101
     if (EXTERNAL_SSE2(cpu_flags)) {
98 102
         c->diff_bytes = ff_diff_bytes_sse2;
103
+        c->diff_int16 = ff_diff_int16_sse2;
99 104
     }
100 105
 
101 106
     if (EXTERNAL_AVX2_FAST(cpu_flags)) {
... ...
@@ -288,25 +288,6 @@ cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
288 288
 .unaligned:
289 289
     INT16_LOOP u, add
290 290
 
291
-%if ARCH_X86_32
292
-INIT_MMX mmx
293
-cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
294
-    INT16_LOOP a, sub
295
-%endif
296
-
297
-INIT_XMM sse2
298
-cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
299
-    test src1q, mmsize-1
300
-    jnz .unaligned
301
-    test src2q, mmsize-1
302
-    jnz .unaligned
303
-    test dstq, mmsize-1
304
-    jnz .unaligned
305
-    INT16_LOOP a, sub
306
-.unaligned:
307
-    INT16_LOOP u, sub
308
-
309
-
310 291
 %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
311 292
     add     wd, wd
312 293
     add     srcq, wq
... ...
@@ -443,42 +424,3 @@ cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_t
443 443
     movzx   r2d, word [topq-2]
444 444
     mov [left_topq], r2d
445 445
     RET
446
-
447
-cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
448
-    add      wd, wd
449
-    movd    mm7, maskd
450
-    SPLATW  mm7, mm7
451
-    movq    mm0, [src1q]
452
-    movq    mm2, [src2q]
453
-    psllq   mm0, 16
454
-    psllq   mm2, 16
455
-    movd    mm6, [left_topq]
456
-    por     mm0, mm6
457
-    movd    mm6, [leftq]
458
-    por     mm2, mm6
459
-    xor     maskq, maskq
460
-.loop:
461
-    movq    mm1, [src1q + maskq]
462
-    movq    mm3, [src2q + maskq]
463
-    movq    mm4, mm2
464
-    psubw   mm2, mm0
465
-    paddw   mm2, mm1
466
-    pand    mm2, mm7
467
-    movq    mm5, mm4
468
-    pmaxsw  mm4, mm1
469
-    pminsw  mm1, mm5
470
-    pminsw  mm4, mm2
471
-    pmaxsw  mm4, mm1
472
-    psubw   mm3, mm4
473
-    pand    mm3, mm7
474
-    movq    [dstq + maskq], mm3
475
-    add     maskq, 8
476
-    movq    mm0, [src1q + maskq - 2]
477
-    movq    mm2, [src2q + maskq - 2]
478
-    cmp     maskq, wq
479
-        jb .loop
480
-    movzx maskd, word [src1q + wq - 2]
481
-    mov [left_topq], maskd
482
-    movzx maskd, word [src2q + wq - 2]
483
-    mov [leftq], maskd
484
-    RET
... ...
@@ -41,12 +41,9 @@ int  ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src,
41 41
 
42 42
 void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
43 43
 void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
44
-void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
45
-void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
46 44
 int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
47 45
 int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
48 46
 void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
49
-void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
50 47
 
51 48
 #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
52 49
 static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
... ...
@@ -98,9 +95,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
98 98
 
99 99
     if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
100 100
         c->add_bytes = ff_add_bytes_mmx;
101
-
102 101
         c->add_int16 = ff_add_int16_mmx;
103
-        c->diff_int16 = ff_diff_int16_mmx;
104 102
     }
105 103
 
106 104
     if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
... ...
@@ -111,7 +106,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
111 111
 
112 112
     if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) {
113 113
         c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
114
-        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
115 114
     }
116 115
 
117 116
     if (EXTERNAL_SSE2(cpu_flags)) {
... ...
@@ -119,7 +113,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
119 119
         c->add_median_pred = ff_add_median_pred_sse2;
120 120
 
121 121
         c->add_int16 = ff_add_int16_sse2;
122
-        c->diff_int16 = ff_diff_int16_sse2;
123 122
     }
124 123
 
125 124
     if (EXTERNAL_SSSE3(cpu_flags)) {