Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
v410dec: Implement explode mode support
zerocodec: fix direct rendering.
wav: init st to NULL to avoid a false-positive warning.
wavpack: set bits_per_raw_sample for S32 samples to properly identify 24-bit
h264: refactor NAL decode loop
RTMPTE protocol support
RTMPE protocol support
rtmp: Add ff_rtmp_calc_digest_pos()
rtmp: Rename rtmp_calc_digest to ff_rtmp_calc_digest and make it global
swscale: add missing HAVE_INLINE_ASM check.
lavfi: place x86 inline assembly under HAVE_INLINE_ASM.
vc1: Add a test for interlaced field pictures
swscale: Mark all init functions as av_cold
swscale: x86: Drop pointless _mmx suffix from filenames
lavf: use conditional notation for default codec in muxer declarations.
swscale: place inline assembly bilinear scaler under HAVE_INLINE_ASM.
dsputil: ppc: cosmetics: pretty-print
dsputil: x86: add SHUFFLE_MASK_W macro
configure: respect CC_O setting in check_cc

Conflicts:
Changelog
configure
libavcodec/v410dec.c
libavcodec/zerocodec.c
libavformat/asfenc.c
libavformat/version.h
libswscale/utils.c
libswscale/x86/swscale.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2012/07/24 04:04:06
Showing 41 changed files
... ...
@@ -21,21 +21,8 @@ version next:
21 21
 - Microsoft Expression Encoder Screen decoder
22 22
 - RTMPS protocol support
23 23
 - RTMPTS protocol support
24
-- showwaves filter
25
-- LucasArts SMUSH playback support
26
-- SAMI demuxer and decoder
27
-- RealText demuxer and decoder
28
-- Heart Of Darkness PAF playback support
29
-- iec61883 device
30
-- asettb filter
31
-- new option: -progress
32
-- 3GPP Timed Text decoder
33
-- GeoTIFF decoder support
34
-- ffmpeg -(no)stdin option
35
-- Opus decoder using libopus
36
-- caca output device using libcaca
37
-- alphaextract and alphamerge filters
38
-- concat filter
24
+- RTMPE protocol support
25
+- RTMPTE protocol support
39 26
 
40 27
 
41 28
 version 0.11:
... ...
@@ -667,11 +667,15 @@ check_cmd(){
667 667
     "$@" >> $logfile 2>&1
668 668
 }
669 669
 
670
+cc_o(){
671
+    eval printf '%s\\n' $CC_O
672
+}
673
+
670 674
 check_cc(){
671 675
     log check_cc "$@"
672 676
     cat > $TMPC
673 677
     log_file $TMPC
674
-    check_cmd $cc $CPPFLAGS $CFLAGS "$@" -c -o $TMPO $TMPC
678
+    check_cmd $cc $CPPFLAGS $CFLAGS "$@" -c $(cc_o $TMPO) $TMPC
675 679
 }
676 680
 
677 681
 check_cxx(){
... ...
@@ -1055,6 +1059,7 @@ CONFIG_LIST="
1055 1055
     fft
1056 1056
     fontconfig
1057 1057
     frei0r
1058
+    gcrypt
1058 1059
     gnutls
1059 1060
     gpl
1060 1061
     gray
... ...
@@ -1100,6 +1105,7 @@ CONFIG_LIST="
1100 1100
     memalign_hack
1101 1101
     memory_poisoning
1102 1102
     mpegaudiodsp
1103
+    nettle
1103 1104
     network
1104 1105
     nonfree
1105 1106
     openal
... ...
@@ -1731,6 +1737,9 @@ x11_grab_device_indev_deps="x11grab"
1731 1731
 
1732 1732
 # protocols
1733 1733
 bluray_protocol_deps="libbluray"
1734
+ffrtmpcrypt_protocol_deps="!librtmp_protocol"
1735
+ffrtmpcrypt_protocol_deps_any="gcrypt nettle openssl"
1736
+ffrtmpcrypt_protocol_select="tcp_protocol"
1734 1737
 ffrtmphttp_protocol_deps="!librtmp_protocol"
1735 1738
 ffrtmphttp_protocol_select="http_protocol"
1736 1739
 gopher_protocol_deps="network"
... ...
@@ -1748,9 +1757,11 @@ mmsh_protocol_select="http_protocol"
1748 1748
 mmst_protocol_deps="network"
1749 1749
 rtmp_protocol_deps="!librtmp_protocol"
1750 1750
 rtmp_protocol_select="tcp_protocol"
1751
+rtmpe_protocol_select="ffrtmpcrypt_protocol"
1751 1752
 rtmps_protocol_deps="!librtmp_protocol"
1752 1753
 rtmps_protocol_select="tls_protocol"
1753 1754
 rtmpt_protocol_select="ffrtmphttp_protocol"
1755
+rtmpte_protocol_select="ffrtmpcrypt_protocol ffrtmphttp_protocol"
1754 1756
 rtmpts_protocol_select="ffrtmphttp_protocol"
1755 1757
 rtp_protocol_select="udp_protocol"
1756 1758
 sctp_protocol_deps="network netinet_sctp_h"
... ...
@@ -2371,12 +2382,10 @@ elif $cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then
2371 2371
 elif $cc -version 2>/dev/null | grep -q TMS470; then
2372 2372
     cc_type=tms470
2373 2373
     cc_ident=$($cc -version | head -n1 | tr -s ' ')
2374
-    cc="$cc --gcc --abi=eabi -eo=.o -mc -me"
2375
-    CC_O='-fr=$(@D)'
2374
+    cc="$cc --gcc --abi=eabi -me"
2375
+    CC_O='-fe=$@'
2376 2376
     as_default="${cross_prefix}gcc"
2377 2377
     ld_default="${cross_prefix}gcc"
2378
-    TMPO=$(basename $TMPC .c).o
2379
-    append TMPFILES $TMPO
2380 2378
     add_cflags -D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__=
2381 2379
     CC_DEPFLAGS='-ppa -ppd=$(@:.o=.d)'
2382 2380
     AS_DEPFLAGS='-MMD'
... ...
@@ -3375,6 +3384,11 @@ enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto
3375 3375
                         check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
3376 3376
                         die "ERROR: openssl not found"; }
3377 3377
 
3378
+if enabled gnutls; then
3379
+    { check_lib nettle/bignum.h nettle_mpz_get_str_256 -lnettle -lhogweed -lgmp && enable nettle; } ||
3380
+    { check_lib gcrypt.h gcry_mpi_new -lgcrypt && enable gcrypt; }
3381
+fi
3382
+
3378 3383
 # libdc1394 check
3379 3384
 if enabled libdc1394; then
3380 3385
     { check_lib dc1394/dc1394.h dc1394_new -ldc1394 -lraw1394 &&
... ...
@@ -904,10 +904,10 @@ performance on systems without hardware floating point support).
904 904
 @item MMST         @tab X
905 905
 @item pipe         @tab X
906 906
 @item RTMP         @tab X
907
-@item RTMPE        @tab E
907
+@item RTMPE        @tab X
908 908
 @item RTMPS        @tab X
909 909
 @item RTMPT        @tab X
910
-@item RTMPTE       @tab E
910
+@item RTMPTE       @tab X
911 911
 @item RTMPTS       @tab X
912 912
 @item RTP          @tab X
913 913
 @item SCTP         @tab X
... ...
@@ -277,6 +277,15 @@ For example to read with @command{ffplay} a multimedia resource named
277 277
 ffplay rtmp://myserver/vod/sample
278 278
 @end example
279 279
 
280
+@section rtmpe
281
+
282
+Encrypted Real-Time Messaging Protocol.
283
+
284
+The Encrypted Real-Time Messaging Protocol (RTMPE) is used for
285
+streaming multimedia content within standard cryptographic primitives,
286
+consisting of Diffie-Hellman key exchange and HMACSHA256, generating
287
+a pair of RC4 keys.
288
+
280 289
 @section rtmps
281 290
 
282 291
 Real-Time Messaging Protocol over a secure SSL connection.
... ...
@@ -292,6 +301,14 @@ The Real-Time Messaging Protocol tunneled through HTTP (RTMPT) is used
292 292
 for streaming multimedia content within HTTP requests to traverse
293 293
 firewalls.
294 294
 
295
+@section rtmpte
296
+
297
+Encrypted Real-Time Messaging Protocol tunneled through HTTP.
298
+
299
+The Encrypted Real-Time Messaging Protocol tunneled through HTTP (RTMPTE)
300
+is used for streaming multimedia content within HTTP requests to traverse
301
+firewalls.
302
+
295 303
 @section rtmpts
296 304
 
297 305
 Real-Time Messaging Protocol tunneled through HTTPS.
... ...
@@ -188,42 +188,50 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
188 188
     src++;
189 189
     length--;
190 190
 
191
+#define STARTCODE_TEST                                                  \
192
+        if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
193
+            if (src[i + 2] != 3) {                                      \
194
+                /* startcode, so we must be past the end */             \
195
+                length = i;                                             \
196
+            }                                                           \
197
+            break;                                                      \
198
+        }
191 199
 #if HAVE_FAST_UNALIGNED
200
+#define FIND_FIRST_ZERO                                                 \
201
+        if (i > 0 && !src[i])                                           \
202
+            i--;                                                        \
203
+        while (src[i])                                                  \
204
+            i++
192 205
 #if HAVE_FAST_64BIT
193
-#define RS 7
194 206
     for (i = 0; i + 1 < length; i += 9) {
195 207
         if (!((~AV_RN64A(src + i) &
196 208
                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
197 209
               0x8000800080008080ULL))
210
+            continue;
211
+        FIND_FIRST_ZERO;
212
+        STARTCODE_TEST;
213
+        i -= 7;
214
+    }
198 215
 #else
199
-#define RS 3
200 216
     for (i = 0; i + 1 < length; i += 5) {
201 217
         if (!((~AV_RN32A(src + i) &
202 218
                (AV_RN32A(src + i) - 0x01000101U)) &
203 219
               0x80008080U))
204
-#endif
205 220
             continue;
206
-        if (i > 0 && !src[i])
207
-            i--;
208
-        while (src[i])
209
-            i++;
221
+        FIND_FIRST_ZERO;
222
+        STARTCODE_TEST;
223
+        i -= 3;
224
+    }
225
+#endif
210 226
 #else
211
-#define RS 0
212 227
     for (i = 0; i + 1 < length; i += 2) {
213 228
         if (src[i])
214 229
             continue;
215 230
         if (i > 0 && src[i - 1] == 0)
216 231
             i--;
217
-#endif
218
-        if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {
219
-            if (src[i + 2] != 3) {
220
-                /* startcode, so we must be past the end */
221
-                length = i;
222
-            }
223
-            break;
224
-        }
225
-        i -= RS;
232
+        STARTCODE_TEST;
226 233
     }
234
+#endif
227 235
 
228 236
     // use second escape buffer for inter data
229 237
     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
... ...
@@ -23,7 +23,8 @@
23 23
 #include "libavutil/ppc/util_altivec.h"
24 24
 #include "dsputil_altivec.h"
25 25
 
26
-static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len)
26
+static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src,
27
+                                               float mul, int len)
27 28
 {
28 29
     union {
29 30
         vector float v;
... ...
@@ -36,7 +37,7 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float
36 36
     mul_u.s[0] = mul;
37 37
     mul_v = vec_splat(mul_u.v, 0);
38 38
 
39
-    for(i=0; i<len; i+=8) {
39
+    for (i = 0; i < len; i += 8) {
40 40
         src1 = vec_ctf(vec_ld(0,  src+i), 0);
41 41
         src2 = vec_ctf(vec_ld(16, src+i), 0);
42 42
         dst1 = vec_madd(src1, mul_v, zero);
... ...
@@ -47,8 +48,7 @@ static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float
47 47
 }
48 48
 
49 49
 
50
-static vector signed short
51
-float_to_int16_one_altivec(const float *src)
50
+static vector signed short float_to_int16_one_altivec(const float *src)
52 51
 {
53 52
     vector float s0 = vec_ld(0, src);
54 53
     vector float s1 = vec_ld(16, src);
... ...
@@ -62,80 +62,82 @@ static void float_to_int16_altivec(int16_t *dst, const float *src, long len)
62 62
     int i;
63 63
     vector signed short d0, d1, d;
64 64
     vector unsigned char align;
65
-    if(((long)dst)&15) //FIXME
66
-    for(i=0; i<len-7; i+=8) {
67
-        d0 = vec_ld(0, dst+i);
68
-        d = float_to_int16_one_altivec(src+i);
69
-        d1 = vec_ld(15, dst+i);
70
-        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
71
-        align = vec_lvsr(0, dst+i);
72
-        d0 = vec_perm(d1, d, align);
73
-        d1 = vec_perm(d, d1, align);
74
-        vec_st(d0, 0, dst+i);
75
-        vec_st(d1,15, dst+i);
76
-    }
77
-    else
78
-    for(i=0; i<len-7; i+=8) {
79
-        d = float_to_int16_one_altivec(src+i);
80
-        vec_st(d, 0, dst+i);
65
+    if (((long)dst) & 15) { //FIXME
66
+        for (i = 0; i < len - 7; i += 8) {
67
+            d0 = vec_ld(0, dst+i);
68
+            d  = float_to_int16_one_altivec(src + i);
69
+            d1 = vec_ld(15, dst+i);
70
+            d1 = vec_perm(d1, d0, vec_lvsl(0, dst + i));
71
+            align = vec_lvsr(0, dst + i);
72
+            d0 = vec_perm(d1, d, align);
73
+            d1 = vec_perm(d, d1, align);
74
+            vec_st(d0,  0, dst + i);
75
+            vec_st(d1, 15, dst + i);
76
+        }
77
+    } else {
78
+        for (i = 0; i < len - 7; i += 8) {
79
+            d = float_to_int16_one_altivec(src + i);
80
+            vec_st(d, 0, dst + i);
81
+        }
81 82
     }
82 83
 }
83 84
 
84
-static void
85
-float_to_int16_interleave_altivec(int16_t *dst, const float **src,
86
-                                  long len, int channels)
85
+static void float_to_int16_interleave_altivec(int16_t *dst, const float **src,
86
+                                              long len, int channels)
87 87
 {
88 88
     int i;
89 89
     vector signed short d0, d1, d2, c0, c1, t0, t1;
90 90
     vector unsigned char align;
91
-    if(channels == 1)
91
+
92
+    if (channels == 1)
92 93
         float_to_int16_altivec(dst, src[0], len);
93
-    else
94
+    else {
94 95
         if (channels == 2) {
95
-        if(((long)dst)&15)
96
-        for(i=0; i<len-7; i+=8) {
97
-            d0 = vec_ld(0, dst + i);
98
-            t0 = float_to_int16_one_altivec(src[0] + i);
99
-            d1 = vec_ld(31, dst + i);
100
-            t1 = float_to_int16_one_altivec(src[1] + i);
101
-            c0 = vec_mergeh(t0, t1);
102
-            c1 = vec_mergel(t0, t1);
103
-            d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));
104
-            align = vec_lvsr(0, dst + i);
105
-            d0 = vec_perm(d2, c0, align);
106
-            d1 = vec_perm(c0, c1, align);
107
-            vec_st(d0,  0, dst + i);
108
-            d0 = vec_perm(c1, d2, align);
109
-            vec_st(d1, 15, dst + i);
110
-            vec_st(d0, 31, dst + i);
111
-            dst+=8;
112
-        }
113
-        else
114
-        for(i=0; i<len-7; i+=8) {
115
-            t0 = float_to_int16_one_altivec(src[0] + i);
116
-            t1 = float_to_int16_one_altivec(src[1] + i);
117
-            d0 = vec_mergeh(t0, t1);
118
-            d1 = vec_mergel(t0, t1);
119
-            vec_st(d0,  0, dst + i);
120
-            vec_st(d1, 16, dst + i);
121
-            dst+=8;
122
-        }
123
-    } else {
124
-        DECLARE_ALIGNED(16, int16_t, tmp)[len];
125
-        int c, j;
126
-        for (c = 0; c < channels; c++) {
127
-            float_to_int16_altivec(tmp, src[c], len);
128
-            for (i = 0, j = c; i < len; i++, j+=channels) {
129
-                dst[j] = tmp[i];
96
+            if (((long)dst) & 15) {
97
+                for (i = 0; i < len - 7; i += 8) {
98
+                    d0 = vec_ld(0,  dst + i);
99
+                    t0 = float_to_int16_one_altivec(src[0] + i);
100
+                    d1 = vec_ld(31, dst + i);
101
+                    t1 = float_to_int16_one_altivec(src[1] + i);
102
+                    c0 = vec_mergeh(t0, t1);
103
+                    c1 = vec_mergel(t0, t1);
104
+                    d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));
105
+                    align = vec_lvsr(0, dst + i);
106
+                    d0 = vec_perm(d2, c0, align);
107
+                    d1 = vec_perm(c0, c1, align);
108
+                    vec_st(d0,  0, dst + i);
109
+                    d0 = vec_perm(c1, d2, align);
110
+                    vec_st(d1, 15, dst + i);
111
+                    vec_st(d0, 31, dst + i);
112
+                    dst += 8;
113
+                }
114
+            } else {
115
+                for (i = 0; i < len - 7; i += 8) {
116
+                    t0 = float_to_int16_one_altivec(src[0] + i);
117
+                    t1 = float_to_int16_one_altivec(src[1] + i);
118
+                    d0 = vec_mergeh(t0, t1);
119
+                    d1 = vec_mergel(t0, t1);
120
+                    vec_st(d0,  0, dst + i);
121
+                    vec_st(d1, 16, dst + i);
122
+                    dst += 8;
123
+                }
124
+            }
125
+        } else {
126
+            DECLARE_ALIGNED(16, int16_t, tmp)[len];
127
+            int c, j;
128
+            for (c = 0; c < channels; c++) {
129
+                float_to_int16_altivec(tmp, src[c], len);
130
+                for (i = 0, j = c; i < len; i++, j+=channels)
131
+                    dst[j] = tmp[i];
130 132
             }
131 133
         }
132
-   }
134
+    }
133 135
 }
134 136
 
135 137
 void ff_fmt_convert_init_altivec(FmtConvertContext *c, AVCodecContext *avctx)
136 138
 {
137 139
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec;
138
-    if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
140
+    if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
139 141
         c->float_to_int16 = float_to_int16_altivec;
140 142
         c->float_to_int16_interleave = float_to_int16_interleave_altivec;
141 143
     }
... ...
@@ -29,7 +29,12 @@ static av_cold int v410_decode_init(AVCodecContext *avctx)
29 29
     avctx->bits_per_raw_sample = 10;
30 30
 
31 31
     if (avctx->width & 1) {
32
-        av_log(avctx, AV_LOG_WARNING, "v410 requires width to be even.\n");
32
+        if (avctx->err_recognition & AV_EF_EXPLODE) {
33
+            av_log(avctx, AV_LOG_ERROR, "v410 requires width to be even, continuing anyway.\n");
34
+            return AVERROR_INVALIDDATA;
35
+        } else {
36
+            av_log(avctx, AV_LOG_WARNING, "v410 requires width to be even.\n");
37
+        }
33 38
     }
34 39
 
35 40
     avctx->coded_frame = avcodec_alloc_frame();
... ...
@@ -28,7 +28,7 @@ pb_zzzzzzzz77777777: times 8 db -1
28 28
 pb_7: times 8 db 7
29 29
 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
30 30
 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
31
-pb_revwords: db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1
31
+pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0
32 32
 pd_16384: times 4 dd 16384
33 33
 pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
34 34
 
... ...
@@ -94,7 +94,7 @@ static int zerocodec_decode_frame(AVCodecContext *avctx, void *data,
94 94
     if (prev_pic->data[0])
95 95
         avctx->release_buffer(avctx, prev_pic);
96 96
 
97
-    *data_size = sizeof(AVFrame);
97
+    *data_size       = sizeof(AVFrame);
98 98
     *(AVFrame *)data = *pic;
99 99
 
100 100
     /* Store the previous frame for use later.
... ...
@@ -23,6 +23,8 @@
23 23
 #include "libavutil/x86_cpu.h"
24 24
 #include "libavfilter/gradfun.h"
25 25
 
26
+#if HAVE_INLINE_ASM
27
+
26 28
 DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
27 29
 DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
28 30
 
... ...
@@ -164,10 +166,13 @@ static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *
164 164
 }
165 165
 #endif // HAVE_SSE
166 166
 
167
+#endif /* HAVE_INLINE_ASM */
168
+
167 169
 av_cold void ff_gradfun_init_x86(GradFunContext *gf)
168 170
 {
169 171
     int cpu_flags = av_get_cpu_flags();
170 172
 
173
+#if HAVE_INLINE_ASM
171 174
 #if HAVE_MMX2
172 175
     if (cpu_flags & AV_CPU_FLAG_MMX2)
173 176
         gf->filter_line = gradfun_filter_line_mmx2;
... ...
@@ -180,4 +185,5 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf)
180 180
     if (cpu_flags & AV_CPU_FLAG_SSE2)
181 181
         gf->blur_line = gradfun_blur_line_sse2;
182 182
 #endif
183
+#endif /* HAVE_INLINE_ASM */
183 184
 }
... ...
@@ -24,6 +24,8 @@
24 24
 #include "libavcodec/x86/dsputil_mmx.h"
25 25
 #include "libavfilter/yadif.h"
26 26
 
27
+#if HAVE_INLINE_ASM
28
+
27 29
 DECLARE_ASM_CONST(16, const xmm_reg, pb_1) = {0x0101010101010101ULL, 0x0101010101010101ULL};
28 30
 DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x0001000100010001ULL};
29 31
 
... ...
@@ -49,10 +51,13 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
49 49
 #include "yadif_template.c"
50 50
 #endif
51 51
 
52
+#endif /* HAVE_INLINE_ASM */
53
+
52 54
 av_cold void ff_yadif_init_x86(YADIFContext *yadif)
53 55
 {
54 56
     int cpu_flags = av_get_cpu_flags();
55 57
 
58
+#if HAVE_INLINE_ASM
56 59
 #if HAVE_MMX
57 60
     if (cpu_flags & AV_CPU_FLAG_MMX)
58 61
         yadif->filter_line = yadif_filter_line_mmx;
... ...
@@ -65,4 +70,5 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
65 65
     if (cpu_flags & AV_CPU_FLAG_SSSE3)
66 66
         yadif->filter_line = yadif_filter_line_ssse3;
67 67
 #endif
68
+#endif /* HAVE_INLINE_ASM */
68 69
 }
... ...
@@ -374,6 +374,7 @@ OBJS-$(CONFIG_BLURAY_PROTOCOL)           += bluray.o
374 374
 OBJS-$(CONFIG_CACHE_PROTOCOL)            += cache.o
375 375
 OBJS-$(CONFIG_CONCAT_PROTOCOL)           += concat.o
376 376
 OBJS-$(CONFIG_CRYPTO_PROTOCOL)           += crypto.o
377
+OBJS-$(CONFIG_FFRTMPCRYPT_PROTOCOL)      += rtmpcrypt.o rtmpdh.o
377 378
 OBJS-$(CONFIG_FFRTMPHTTP_PROTOCOL)       += rtmphttp.o
378 379
 OBJS-$(CONFIG_FILE_PROTOCOL)             += file.o
379 380
 OBJS-$(CONFIG_GOPHER_PROTOCOL)           += gopher.o
... ...
@@ -386,8 +387,10 @@ OBJS-$(CONFIG_MMST_PROTOCOL)             += mmst.o mms.o asf.o
386 386
 OBJS-$(CONFIG_MD5_PROTOCOL)              += md5proto.o
387 387
 OBJS-$(CONFIG_PIPE_PROTOCOL)             += file.o
388 388
 OBJS-$(CONFIG_RTMP_PROTOCOL)             += rtmpproto.o rtmppkt.o
389
+OBJS-$(CONFIG_RTMPE_PROTOCOL)            += rtmpproto.o rtmppkt.o
389 390
 OBJS-$(CONFIG_RTMPS_PROTOCOL)            += rtmpproto.o rtmppkt.o
390 391
 OBJS-$(CONFIG_RTMPT_PROTOCOL)            += rtmpproto.o rtmppkt.o
392
+OBJS-$(CONFIG_RTMPTE_PROTOCOL)           += rtmpproto.o rtmppkt.o
391 393
 OBJS-$(CONFIG_RTMPTS_PROTOCOL)           += rtmpproto.o rtmppkt.o
392 394
 OBJS-$(CONFIG_RTP_PROTOCOL)              += rtpproto.o
393 395
 OBJS-$(CONFIG_SCTP_PROTOCOL)             += sctp.o
... ...
@@ -272,6 +272,7 @@ void av_register_all(void)
272 272
     REGISTER_PROTOCOL (CACHE, cache);
273 273
     REGISTER_PROTOCOL (CONCAT, concat);
274 274
     REGISTER_PROTOCOL (CRYPTO, crypto);
275
+    REGISTER_PROTOCOL (FFRTMPCRYPT, ffrtmpcrypt);
275 276
     REGISTER_PROTOCOL (FFRTMPHTTP, ffrtmphttp);
276 277
     REGISTER_PROTOCOL (FILE, file);
277 278
     REGISTER_PROTOCOL (GOPHER, gopher);
... ...
@@ -284,8 +285,10 @@ void av_register_all(void)
284 284
     REGISTER_PROTOCOL (MD5,  md5);
285 285
     REGISTER_PROTOCOL (PIPE, pipe);
286 286
     REGISTER_PROTOCOL (RTMP, rtmp);
287
+    REGISTER_PROTOCOL (RTMPE, rtmpe);
287 288
     REGISTER_PROTOCOL (RTMPS, rtmps);
288 289
     REGISTER_PROTOCOL (RTMPT, rtmpt);
290
+    REGISTER_PROTOCOL (RTMPTE, rtmpte);
289 291
     REGISTER_PROTOCOL (RTMPTS, rtmpts);
290 292
     REGISTER_PROTOCOL (RTP, rtp);
291 293
     REGISTER_PROTOCOL (SCTP, sctp);
... ...
@@ -654,11 +654,7 @@ AVOutputFormat ff_avi_muxer = {
654 654
     .mime_type         = "video/x-msvideo",
655 655
     .extensions        = "avi",
656 656
     .priv_data_size    = sizeof(AVIContext),
657
-#if CONFIG_LIBMP3LAME_ENCODER
658
-    .audio_codec       = CODEC_ID_MP3,
659
-#else
660
-    .audio_codec       = CODEC_ID_AC3,
661
-#endif
657
+    .audio_codec       = CONFIG_LIBMP3LAME ? CODEC_ID_MP3 : CODEC_ID_AC3,
662 658
     .video_codec       = CODEC_ID_MPEG4,
663 659
     .write_header      = avi_write_header,
664 660
     .write_packet      = avi_write_packet,
... ...
@@ -570,11 +570,7 @@ AVOutputFormat ff_flv_muxer = {
570 570
     .mime_type      = "video/x-flv",
571 571
     .extensions     = "flv",
572 572
     .priv_data_size = sizeof(FLVContext),
573
-#if CONFIG_LIBMP3LAME
574
-    .audio_codec    = CODEC_ID_MP3,
575
-#else // CONFIG_LIBMP3LAME
576
-    .audio_codec    = CODEC_ID_ADPCM_SWF,
577
-#endif // CONFIG_LIBMP3LAME
573
+    .audio_codec    = CONFIG_LIBMP3LAME ? CODEC_ID_MP3 : CODEC_ID_ADPCM_SWF,
578 574
     .video_codec    = CODEC_ID_FLV1,
579 575
     .write_header   = flv_write_header,
580 576
     .write_packet   = flv_write_packet,
... ...
@@ -1311,16 +1311,10 @@ AVOutputFormat ff_matroska_muxer = {
1311 1311
     .mime_type         = "video/x-matroska",
1312 1312
     .extensions        = "mkv",
1313 1313
     .priv_data_size    = sizeof(MatroskaMuxContext),
1314
-#if CONFIG_LIBVORBIS_ENCODER
1315
-    .audio_codec       = CODEC_ID_VORBIS,
1316
-#else
1317
-    .audio_codec       = CODEC_ID_AC3,
1318
-#endif
1319
-#if CONFIG_LIBX264_ENCODER
1320
-    .video_codec       = CODEC_ID_H264,
1321
-#else
1322
-    .video_codec       = CODEC_ID_MPEG4,
1323
-#endif
1314
+    .audio_codec       = CONFIG_LIBVORBIS_ENCODER ?
1315
+                         CODEC_ID_VORBIS : CODEC_ID_AC3,
1316
+    .video_codec       = CONFIG_LIBX264_ENCODER ?
1317
+                         CODEC_ID_H264 : CODEC_ID_MPEG4,
1324 1318
     .write_header      = mkv_write_header,
1325 1319
     .write_packet      = mkv_write_packet,
1326 1320
     .write_trailer     = mkv_write_trailer,
... ...
@@ -1355,11 +1349,8 @@ AVOutputFormat ff_matroska_audio_muxer = {
1355 1355
     .mime_type         = "audio/x-matroska",
1356 1356
     .extensions        = "mka",
1357 1357
     .priv_data_size    = sizeof(MatroskaMuxContext),
1358
-#if CONFIG_LIBVORBIS_ENCODER
1359
-    .audio_codec       = CODEC_ID_VORBIS,
1360
-#else
1361
-    .audio_codec       = CODEC_ID_AC3,
1362
-#endif
1358
+    .audio_codec       = CONFIG_LIBVORBIS_ENCODER ?
1359
+                         CODEC_ID_VORBIS : CODEC_ID_AC3,
1363 1360
     .video_codec       = CODEC_ID_NONE,
1364 1361
     .write_header      = mkv_write_header,
1365 1362
     .write_packet      = mkv_write_packet,
... ...
@@ -3584,11 +3584,8 @@ AVOutputFormat ff_mov_muxer = {
3584 3584
     .extensions        = "mov",
3585 3585
     .priv_data_size    = sizeof(MOVMuxContext),
3586 3586
     .audio_codec       = CODEC_ID_AAC,
3587
-#if CONFIG_LIBX264_ENCODER
3588
-    .video_codec       = CODEC_ID_H264,
3589
-#else
3590
-    .video_codec       = CODEC_ID_MPEG4,
3591
-#endif
3587
+    .video_codec       = CONFIG_LIBX264_ENCODER ?
3588
+                         CODEC_ID_H264 : CODEC_ID_MPEG4,
3592 3589
     .write_header      = mov_write_header,
3593 3590
     .write_packet      = mov_write_packet,
3594 3591
     .write_trailer     = mov_write_trailer,
... ...
@@ -3625,11 +3622,8 @@ AVOutputFormat ff_mp4_muxer = {
3625 3625
     .extensions        = "mp4",
3626 3626
     .priv_data_size    = sizeof(MOVMuxContext),
3627 3627
     .audio_codec       = CODEC_ID_AAC,
3628
-#if CONFIG_LIBX264_ENCODER
3629
-    .video_codec       = CODEC_ID_H264,
3630
-#else
3631
-    .video_codec       = CODEC_ID_MPEG4,
3632
-#endif
3628
+    .video_codec       = CONFIG_LIBX264_ENCODER ?
3629
+                         CODEC_ID_H264 : CODEC_ID_MPEG4,
3633 3630
     .write_header      = mov_write_header,
3634 3631
     .write_packet      = mov_write_packet,
3635 3632
     .write_trailer     = mov_write_trailer,
... ...
@@ -3646,11 +3640,8 @@ AVOutputFormat ff_psp_muxer = {
3646 3646
     .extensions        = "mp4,psp",
3647 3647
     .priv_data_size    = sizeof(MOVMuxContext),
3648 3648
     .audio_codec       = CODEC_ID_AAC,
3649
-#if CONFIG_LIBX264_ENCODER
3650
-    .video_codec       = CODEC_ID_H264,
3651
-#else
3652
-    .video_codec       = CODEC_ID_MPEG4,
3653
-#endif
3649
+    .video_codec       = CONFIG_LIBX264_ENCODER ?
3650
+                         CODEC_ID_H264 : CODEC_ID_MPEG4,
3654 3651
     .write_header      = mov_write_header,
3655 3652
     .write_packet      = mov_write_packet,
3656 3653
     .write_trailer     = mov_write_trailer,
... ...
@@ -870,13 +870,8 @@ AVOutputFormat ff_nut_muxer = {
870 870
     .mime_type      = "video/x-nut",
871 871
     .extensions     = "nut",
872 872
     .priv_data_size = sizeof(NUTContext),
873
-#if   CONFIG_LIBVORBIS
874
-    .audio_codec    = CODEC_ID_VORBIS,
875
-#elif CONFIG_LIBMP3LAME
876
-    .audio_codec    = CODEC_ID_MP3,
877
-#else
878
-    .audio_codec    = CODEC_ID_MP2,
879
-#endif
873
+    .audio_codec    = CONFIG_LIBVORBIS ? CODEC_ID_VORBIS :
874
+                      CONFIG_LIBMP3LAME ? CODEC_ID_MP3 : CODEC_ID_MP2,
880 875
     .video_codec    = CODEC_ID_MPEG4,
881 876
     .write_header   = nut_write_header,
882 877
     .write_packet   = nut_write_packet,
... ...
@@ -29,6 +29,9 @@
29 29
 
30 30
 #define RTMP_HANDSHAKE_PACKET_SIZE 1536
31 31
 
32
+#define HMAC_IPAD_VAL 0x36
33
+#define HMAC_OPAD_VAL 0x5C
34
+
32 35
 /**
33 36
  * emulated Flash client version - 9.0.124.2 on Linux
34 37
  * @{
... ...
@@ -40,4 +43,29 @@
40 40
 #define RTMP_CLIENT_VER4    2
41 41
 /** @} */ //version defines
42 42
 
43
+/**
44
+ * Calculate HMAC-SHA2 digest for RTMP handshake packets.
45
+ *
46
+ * @param src    input buffer
47
+ * @param len    input buffer length (should be 1536)
48
+ * @param gap    offset in buffer where 32 bytes should not be taken into account
49
+ *               when calculating digest (since it will be used to store that digest)
50
+ * @param key    digest key
51
+ * @param keylen digest key length
52
+ * @param dst    buffer where calculated digest will be stored (32 bytes)
53
+ */
54
+int ff_rtmp_calc_digest(const uint8_t *src, int len, int gap,
55
+                        const uint8_t *key, int keylen, uint8_t *dst);
56
+
57
+/**
58
+ * Calculate digest position for RTMP handshake packets.
59
+ *
60
+ * @param buf input buffer (should be 1536 bytes)
61
+ * @param off offset in buffer where to start calculating digest position
62
+ * @param mod_val value used for computing modulo
63
+ * @param add_val value added at the end (after computing modulo)
64
+ */
65
+int ff_rtmp_calc_digest_pos(const uint8_t *buf, int off, int mod_val,
66
+                            int add_val);
67
+
43 68
 #endif /* AVFORMAT_RTMP_H */
44 69
new file mode 100644
... ...
@@ -0,0 +1,334 @@
0
+/*
1
+ * RTMPE network protocol
2
+ * Copyright (c) 2012 Samuel Pitoiset
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/**
22
+ * @file
23
+ * RTMPE protocol
24
+ */
25
+
26
+#include "libavutil/blowfish.h"
27
+#include "libavutil/intreadwrite.h"
28
+#include "libavutil/opt.h"
29
+#include "libavutil/rc4.h"
30
+#include "libavutil/xtea.h"
31
+
32
+#include "internal.h"
33
+#include "rtmp.h"
34
+#include "rtmpdh.h"
35
+#include "rtmpcrypt.h"
36
+#include "url.h"
37
+
38
+/* protocol handler context */
39
+typedef struct RTMPEContext {
40
+    const AVClass *class;
41
+    URLContext   *stream;            ///< TCP stream
42
+    FF_DH        *dh;                ///< Diffie-Hellman context
43
+    struct AVRC4 key_in;             ///< RC4 key used for decrypt data
44
+    struct AVRC4 key_out;            ///< RC4 key used for encrypt data
45
+    int          handshaked;         ///< flag indicating when the handshake is performed
46
+    int          tunneling;          ///< use a HTTP connection (RTMPTE)
47
+} RTMPEContext;
48
+
49
+static const uint8_t rtmpe8_keys[16][16] = {
50
+    { 0xbf, 0xf0, 0x34, 0xb2, 0x11, 0xd9, 0x08, 0x1f,
51
+      0xcc, 0xdf, 0xb7, 0x95, 0x74, 0x8d, 0xe7, 0x32 },
52
+    { 0x08, 0x6a, 0x5e, 0xb6, 0x17, 0x43, 0x09, 0x0e,
53
+      0x6e, 0xf0, 0x5a, 0xb8, 0xfe, 0x5a, 0x39, 0xe2 },
54
+    { 0x7b, 0x10, 0x95, 0x6f, 0x76, 0xce, 0x05, 0x21,
55
+      0x23, 0x88, 0xa7, 0x3a, 0x44, 0x01, 0x49, 0xa1 },
56
+    { 0xa9, 0x43, 0xf3, 0x17, 0xeb, 0xf1, 0x1b, 0xb2,
57
+      0xa6, 0x91, 0xa5, 0xee, 0x17, 0xf3, 0x63, 0x39 },
58
+    { 0x7a, 0x30, 0xe0, 0x0a, 0xb5, 0x29, 0xe2, 0x2c,
59
+      0xa0, 0x87, 0xae, 0xa5, 0xc0, 0xcb, 0x79, 0xac },
60
+    { 0xbd, 0xce, 0x0c, 0x23, 0x2f, 0xeb, 0xde, 0xff,
61
+      0x1c, 0xfa, 0xae, 0x16, 0x11, 0x23, 0x23, 0x9d },
62
+    { 0x55, 0xdd, 0x3f, 0x7b, 0x77, 0xe7, 0xe6, 0x2e,
63
+      0x9b, 0xb8, 0xc4, 0x99, 0xc9, 0x48, 0x1e, 0xe4 },
64
+    { 0x40, 0x7b, 0xb6, 0xb4, 0x71, 0xe8, 0x91, 0x36,
65
+      0xa7, 0xae, 0xbf, 0x55, 0xca, 0x33, 0xb8, 0x39 },
66
+    { 0xfc, 0xf6, 0xbd, 0xc3, 0xb6, 0x3c, 0x36, 0x97,
67
+      0x7c, 0xe4, 0xf8, 0x25, 0x04, 0xd9, 0x59, 0xb2 },
68
+    { 0x28, 0xe0, 0x91, 0xfd, 0x41, 0x95, 0x4c, 0x4c,
69
+      0x7f, 0xb7, 0xdb, 0x00, 0xe3, 0xa0, 0x66, 0xf8 },
70
+    { 0x57, 0x84, 0x5b, 0x76, 0x4f, 0x25, 0x1b, 0x03,
71
+      0x46, 0xd4, 0x5b, 0xcd, 0xa2, 0xc3, 0x0d, 0x29 },
72
+    { 0x0a, 0xcc, 0xee, 0xf8, 0xda, 0x55, 0xb5, 0x46,
73
+      0x03, 0x47, 0x34, 0x52, 0x58, 0x63, 0x71, 0x3b },
74
+    { 0xb8, 0x20, 0x75, 0xdc, 0xa7, 0x5f, 0x1f, 0xee,
75
+      0xd8, 0x42, 0x68, 0xe8, 0xa7, 0x2a, 0x44, 0xcc },
76
+    { 0x07, 0xcf, 0x6e, 0x9e, 0xa1, 0x6d, 0x7b, 0x25,
77
+      0x9f, 0xa7, 0xae, 0x6c, 0xd9, 0x2f, 0x56, 0x29 },
78
+    { 0xfe, 0xb1, 0xea, 0xe4, 0x8c, 0x8c, 0x3c, 0xe1,
79
+      0x4e, 0x00, 0x64, 0xa7, 0x6a, 0x38, 0x7c, 0x2a },
80
+    { 0x89, 0x3a, 0x94, 0x27, 0xcc, 0x30, 0x13, 0xa2,
81
+      0xf1, 0x06, 0x38, 0x5b, 0xa8, 0x29, 0xf9, 0x27 }
82
+};
83
+
84
+static const uint8_t rtmpe9_keys[16][24] = {
85
+    { 0x79, 0x34, 0x77, 0x4c, 0x67, 0xd1, 0x38, 0x3a, 0xdf, 0xb3, 0x56, 0xbe,
86
+      0x8b, 0x7b, 0xd0, 0x24, 0x38, 0xe0, 0x73, 0x58, 0x41, 0x5d, 0x69, 0x67, },
87
+    { 0x46, 0xf6, 0xb4, 0xcc, 0x01, 0x93, 0xe3, 0xa1, 0x9e, 0x7d, 0x3c, 0x65,
88
+      0x55, 0x86, 0xfd, 0x09, 0x8f, 0xf7, 0xb3, 0xc4, 0x6f, 0x41, 0xca, 0x5c, },
89
+    { 0x1a, 0xe7, 0xe2, 0xf3, 0xf9, 0x14, 0x79, 0x94, 0xc0, 0xd3, 0x97, 0x43,
90
+      0x08, 0x7b, 0xb3, 0x84, 0x43, 0x2f, 0x9d, 0x84, 0x3f, 0x21, 0x01, 0x9b, },
91
+    { 0xd3, 0xe3, 0x54, 0xb0, 0xf7, 0x1d, 0xf6, 0x2b, 0x5a, 0x43, 0x4d, 0x04,
92
+      0x83, 0x64, 0x3e, 0x0d, 0x59, 0x2f, 0x61, 0xcb, 0xb1, 0x6a, 0x59, 0x0d, },
93
+    { 0xc8, 0xc1, 0xe9, 0xb8, 0x16, 0x56, 0x99, 0x21, 0x7b, 0x5b, 0x36, 0xb7,
94
+      0xb5, 0x9b, 0xdf, 0x06, 0x49, 0x2c, 0x97, 0xf5, 0x95, 0x48, 0x85, 0x7e, },
95
+    { 0xeb, 0xe5, 0xe6, 0x2e, 0xa4, 0xba, 0xd4, 0x2c, 0xf2, 0x16, 0xe0, 0x8f,
96
+      0x66, 0x23, 0xa9, 0x43, 0x41, 0xce, 0x38, 0x14, 0x84, 0x95, 0x00, 0x53, },
97
+    { 0x66, 0xdb, 0x90, 0xf0, 0x3b, 0x4f, 0xf5, 0x6f, 0xe4, 0x9c, 0x20, 0x89,
98
+      0x35, 0x5e, 0xd2, 0xb2, 0xc3, 0x9e, 0x9f, 0x7f, 0x63, 0xb2, 0x28, 0x81, },
99
+    { 0xbb, 0x20, 0xac, 0xed, 0x2a, 0x04, 0x6a, 0x19, 0x94, 0x98, 0x9b, 0xc8,
100
+      0xff, 0xcd, 0x93, 0xef, 0xc6, 0x0d, 0x56, 0xa7, 0xeb, 0x13, 0xd9, 0x30, },
101
+    { 0xbc, 0xf2, 0x43, 0x82, 0x09, 0x40, 0x8a, 0x87, 0x25, 0x43, 0x6d, 0xe6,
102
+      0xbb, 0xa4, 0xb9, 0x44, 0x58, 0x3f, 0x21, 0x7c, 0x99, 0xbb, 0x3f, 0x24, },
103
+    { 0xec, 0x1a, 0xaa, 0xcd, 0xce, 0xbd, 0x53, 0x11, 0xd2, 0xfb, 0x83, 0xb6,
104
+      0xc3, 0xba, 0xab, 0x4f, 0x62, 0x79, 0xe8, 0x65, 0xa9, 0x92, 0x28, 0x76, },
105
+    { 0xc6, 0x0c, 0x30, 0x03, 0x91, 0x18, 0x2d, 0x7b, 0x79, 0xda, 0xe1, 0xd5,
106
+      0x64, 0x77, 0x9a, 0x12, 0xc5, 0xb1, 0xd7, 0x91, 0x4f, 0x96, 0x4c, 0xa3, },
107
+    { 0xd7, 0x7c, 0x2a, 0xbf, 0xa6, 0xe7, 0x85, 0x7c, 0x45, 0xad, 0xff, 0x12,
108
+      0x94, 0xd8, 0xde, 0xa4, 0x5c, 0x3d, 0x79, 0xa4, 0x44, 0x02, 0x5d, 0x22, },
109
+    { 0x16, 0x19, 0x0d, 0x81, 0x6a, 0x4c, 0xc7, 0xf8, 0xb8, 0xf9, 0x4e, 0xcd,
110
+      0x2c, 0x9e, 0x90, 0x84, 0xb2, 0x08, 0x25, 0x60, 0xe1, 0x1e, 0xae, 0x18, },
111
+    { 0xe9, 0x7c, 0x58, 0x26, 0x1b, 0x51, 0x9e, 0x49, 0x82, 0x60, 0x61, 0xfc,
112
+      0xa0, 0xa0, 0x1b, 0xcd, 0xf5, 0x05, 0xd6, 0xa6, 0x6d, 0x07, 0x88, 0xa3, },
113
+    { 0x2b, 0x97, 0x11, 0x8b, 0xd9, 0x4e, 0xd9, 0xdf, 0x20, 0xe3, 0x9c, 0x10,
114
+      0xe6, 0xa1, 0x35, 0x21, 0x11, 0xf9, 0x13, 0x0d, 0x0b, 0x24, 0x65, 0xb2, },
115
+    { 0x53, 0x6a, 0x4c, 0x54, 0xac, 0x8b, 0x9b, 0xb8, 0x97, 0x29, 0xfc, 0x60,
116
+      0x2c, 0x5b, 0x3a, 0x85, 0x68, 0xb5, 0xaa, 0x6a, 0x44, 0xcd, 0x3f, 0xa7, },
117
+};
118
+
119
+int ff_rtmpe_gen_pub_key(URLContext *h, uint8_t *buf)
120
+{
121
+    RTMPEContext *rt = h->priv_data;
122
+    int offset, ret;
123
+
124
+    if (!(rt->dh = ff_dh_init(1024)))
125
+        return AVERROR(ENOMEM);
126
+
127
+    offset = ff_rtmp_calc_digest_pos(buf, 768, 632, 8);
128
+    if (offset < 0)
129
+        return offset;
130
+
131
+    /* generate a Diffie-Hellmann public key */
132
+    if ((ret = ff_dh_generate_public_key(rt->dh)) < 0)
133
+        return ret;
134
+
135
+    /* write the public key into the handshake buffer */
136
+    if ((ret = ff_dh_write_public_key(rt->dh, buf + offset, 128)) < 0)
137
+        return ret;
138
+
139
+    return 0;
140
+}
141
+
142
+int ff_rtmpe_compute_secret_key(URLContext *h, const uint8_t *serverdata,
143
+                                const uint8_t *clientdata, int type)
144
+{
145
+    RTMPEContext *rt = h->priv_data;
146
+    uint8_t secret_key[128], digest[32];
147
+    int server_pos, client_pos;
148
+    int ret;
149
+
150
+    if (type) {
151
+        if ((server_pos = ff_rtmp_calc_digest_pos(serverdata, 1532, 632, 772)) < 0)
152
+            return server_pos;
153
+    } else {
154
+        if ((server_pos = ff_rtmp_calc_digest_pos(serverdata, 768, 632, 8)) < 0)
155
+            return server_pos;
156
+    }
157
+
158
+    if ((client_pos = ff_rtmp_calc_digest_pos(clientdata, 768, 632, 8)) < 0)
159
+        return client_pos;
160
+
161
+    /* compute the shared secret secret in order to compute RC4 keys */
162
+    if ((ret = ff_dh_compute_shared_secret_key(rt->dh, serverdata + server_pos,
163
+                                               128, secret_key)) < 0)
164
+        return ret;
165
+
166
+    /* set output key */
167
+    if ((ret = ff_rtmp_calc_digest(serverdata + server_pos, 128, 0, secret_key,
168
+                                   128, digest)) < 0)
169
+        return ret;
170
+    av_rc4_init(&rt->key_out, digest, 16 * 8, 1);
171
+
172
+    /* set input key */
173
+    if ((ret = ff_rtmp_calc_digest(clientdata + client_pos, 128, 0, secret_key,
174
+                                   128, digest)) < 0)
175
+        return ret;
176
+    av_rc4_init(&rt->key_in, digest, 16 * 8, 1);
177
+
178
+    return 0;
179
+}
180
+
181
+static void rtmpe8_sig(const uint8_t *in, uint8_t *out, int key_id)
182
+{
183
+    struct AVXTEA ctx;
184
+
185
+    av_xtea_init(&ctx, rtmpe8_keys[key_id]);
186
+    av_xtea_crypt(&ctx, out, in, 1, NULL, 0);
187
+}
188
+
189
+static void rtmpe9_sig(const uint8_t *in, uint8_t *out, int key_id)
190
+{
191
+    struct AVBlowfish ctx;
192
+    uint32_t xl, xr;
193
+
194
+    xl = AV_RL32(in);
195
+    xr = AV_RL32(in + 4);
196
+
197
+    av_blowfish_init(&ctx, rtmpe9_keys[key_id], 24);
198
+    av_blowfish_crypt_ecb(&ctx, &xl, &xr, 0);
199
+
200
+    AV_WL32(out, xl);
201
+    AV_WL32(out + 4, xr);
202
+}
203
+
204
+void ff_rtmpe_encrypt_sig(URLContext *h, uint8_t *sig, const uint8_t *digest,
205
+                          int type)
206
+{
207
+    int i;
208
+
209
+    for (i = 0; i < 32; i += 8) {
210
+        if (type == 8) {
211
+            /* RTMPE type 8 uses XTEA on the signature */
212
+            rtmpe8_sig(sig + i, sig + i, digest[i] % 15);
213
+        } else if (type == 9) {
214
+            /* RTMPE type 9 uses Blowfish on the signature */
215
+            rtmpe9_sig(sig + i, sig + i, digest[i] % 15);
216
+        }
217
+    }
218
+}
219
+
220
+int ff_rtmpe_update_keystream(URLContext *h)
221
+{
222
+    RTMPEContext *rt = h->priv_data;
223
+    char buf[RTMP_HANDSHAKE_PACKET_SIZE];
224
+
225
+    /* skip past 1536 bytes of the RC4 bytestream */
226
+    av_rc4_crypt(&rt->key_in, buf, NULL, sizeof(buf), NULL, 1);
227
+    av_rc4_crypt(&rt->key_out, buf, NULL, sizeof(buf), NULL, 1);
228
+
229
+    /* the next requests will be encrypted using RC4 keys */
230
+    rt->handshaked = 1;
231
+
232
+    return 0;
233
+}
234
+
235
+static int rtmpe_close(URLContext *h)
236
+{
237
+    RTMPEContext *rt = h->priv_data;
238
+
239
+    ff_dh_free(rt->dh);
240
+    ffurl_close(rt->stream);
241
+
242
+    return 0;
243
+}
244
+
245
+static int rtmpe_open(URLContext *h, const char *uri, int flags)
246
+{
247
+    RTMPEContext *rt = h->priv_data;
248
+    char host[256], url[1024];
249
+    int ret, port;
250
+
251
+    av_url_split(NULL, 0, NULL, 0, host, sizeof(host), &port, NULL, 0, uri);
252
+
253
+    if (rt->tunneling) {
254
+        if (port < 0)
255
+            port = 80;
256
+        ff_url_join(url, sizeof(url), "ffrtmphttp", NULL, host, port, NULL);
257
+    } else {
258
+        if (port < 0)
259
+            port = 1935;
260
+        ff_url_join(url, sizeof(url), "tcp", NULL, host, port, NULL);
261
+    }
262
+
263
+    /* open the tcp or ffrtmphttp connection */
264
+    if ((ret = ffurl_open(&rt->stream, url, AVIO_FLAG_READ_WRITE,
265
+                          &h->interrupt_callback, NULL)) < 0) {
266
+        rtmpe_close(h);
267
+        return ret;
268
+    }
269
+
270
+    return 0;
271
+}
272
+
273
+static int rtmpe_read(URLContext *h, uint8_t *buf, int size)
274
+{
275
+    RTMPEContext *rt = h->priv_data;
276
+    int ret;
277
+
278
+    rt->stream->flags |= h->flags & AVIO_FLAG_NONBLOCK;
279
+    ret = ffurl_read(rt->stream, buf, size);
280
+    rt->stream->flags &= ~AVIO_FLAG_NONBLOCK;
281
+
282
+    if (ret < 0 && ret != AVERROR_EOF)
283
+        return ret;
284
+
285
+    if (rt->handshaked && ret > 0) {
286
+        /* decrypt data received by the server */
287
+        av_rc4_crypt(&rt->key_in, buf, buf, ret, NULL, 1);
288
+    }
289
+
290
+    return ret;
291
+}
292
+
293
+static int rtmpe_write(URLContext *h, const uint8_t *buf, int size)
294
+{
295
+    RTMPEContext *rt = h->priv_data;
296
+    int ret;
297
+
298
+    if (rt->handshaked) {
299
+        /* encrypt data to send to the server */
300
+        av_rc4_crypt(&rt->key_out, buf, buf, size, NULL, 1);
301
+    }
302
+
303
+    if ((ret = ffurl_write(rt->stream, buf, size)) < 0)
304
+        return ret;
305
+
306
+    return size;
307
+}
308
+
309
+#define OFFSET(x) offsetof(RTMPEContext, x)
310
+#define DEC AV_OPT_FLAG_DECODING_PARAM
311
+
312
+static const AVOption ffrtmpcrypt_options[] = {
313
+    {"ffrtmpcrypt_tunneling", "Use a HTTP tunneling connection (RTMPTE).", OFFSET(tunneling), AV_OPT_TYPE_INT, {0}, 0, 1, DEC},
314
+    { NULL },
315
+};
316
+
317
+static const AVClass ffrtmpcrypt_class = {
318
+    .class_name = "ffrtmpcrypt",
319
+    .item_name  = av_default_item_name,
320
+    .option     = ffrtmpcrypt_options,
321
+    .version    = LIBAVUTIL_VERSION_INT,
322
+};
323
+
324
+URLProtocol ff_ffrtmpcrypt_protocol = {
325
+    .name            = "ffrtmpcrypt",
326
+    .url_open        = rtmpe_open,
327
+    .url_read        = rtmpe_read,
328
+    .url_write       = rtmpe_write,
329
+    .url_close       = rtmpe_close,
330
+    .priv_data_size  = sizeof(RTMPEContext),
331
+    .flags           = URL_PROTOCOL_FLAG_NETWORK,
332
+    .priv_data_class = &ffrtmpcrypt_class,
333
+};
0 334
new file mode 100644
... ...
@@ -0,0 +1,69 @@
0
+/*
1
+ * RTMPE encryption utilities
2
+ * Copyright (c) 2012 Samuel Pitoiset
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#ifndef AVFORMAT_RTMPCRYPT_H
22
+#define AVFORMAT_RTMPCRYPT_H
23
+
24
+#include <stdint.h>
25
+
26
+#include "url.h"
27
+
28
+/**
29
+ * Initialize the Diffie-Hellmann context and generate the public key.
30
+ *
31
+ * @param h     an URLContext
32
+ * @param buf   handshake data (1536 bytes)
33
+ * @return zero on success, negative value otherwise
34
+ */
35
+int ff_rtmpe_gen_pub_key(URLContext *h, uint8_t *buf);
36
+
37
+/**
38
+ * Compute the shared secret key and initialize the RC4 encryption.
39
+ *
40
+ * @param h             an URLContext
41
+ * @param serverdata    server data (1536 bytes)
42
+ * @param clientdata    client data (1536 bytes)
43
+ * @param type          the position of the server digest
44
+ * @return zero on success, negative value otherwise
45
+ */
46
+int ff_rtmpe_compute_secret_key(URLContext *h, const uint8_t *serverdata,
47
+                                const uint8_t *clientdata, int type);
48
+
49
+/**
50
+ * Encrypt the signature.
51
+ *
52
+ * @param h             an URLContext
53
+ * @param signature     the signature to encrypt
54
+ * @param digest        the digest used for finding the encryption key
55
+ * @param type          type of encryption (8 for XTEA, 9 for Blowfish)
56
+ */
57
+void ff_rtmpe_encrypt_sig(URLContext *h, uint8_t *signature,
58
+                          const uint8_t *digest, int type);
59
+
60
+/**
61
+ * Update the keystream and set RC4 keys for encryption.
62
+ *
63
+ * @param h an URLContext
64
+ * @return zero on success, negative value otherwise
65
+ */
66
+int ff_rtmpe_update_keystream(URLContext *h);
67
+
68
+#endif /* AVFORMAT_RTMPCRYPT_H */
0 69
new file mode 100644
... ...
@@ -0,0 +1,329 @@
0
+/*
1
+ * RTMP Diffie-Hellmann utilities
2
+ * Copyright (c) 2012 Samuel Pitoiset
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+/**
22
+ * @file
23
+ * RTMP Diffie-Hellmann utilities
24
+ */
25
+
26
+#include "config.h"
27
+#include "rtmpdh.h"
28
+
29
+#define P1024                                          \
30
+    "FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1" \
31
+    "29024E088A67CC74020BBEA63B139B22514A08798E3404DD" \
32
+    "EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245" \
33
+    "E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED" \
34
+    "EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE65381" \
35
+    "FFFFFFFFFFFFFFFF"
36
+
37
+#define Q1024                                          \
38
+    "7FFFFFFFFFFFFFFFE487ED5110B4611A62633145C06E0E68" \
39
+    "948127044533E63A0105DF531D89CD9128A5043CC71A026E" \
40
+    "F7CA8CD9E69D218D98158536F92F8A1BA7F09AB6B6A8E122" \
41
+    "F242DABB312F3F637A262174D31BF6B585FFAE5B7A035BF6" \
42
+    "F71C35FDAD44CFD2D74F9208BE258FF324943328F67329C0" \
43
+    "FFFFFFFFFFFFFFFF"
44
+
45
+#if CONFIG_NETTLE || CONFIG_GCRYPT
46
+#if CONFIG_NETTLE
47
+#define bn_new(bn)                      \
48
+    do {                                \
49
+        bn = av_malloc(sizeof(*bn));    \
50
+        if (bn)                         \
51
+            mpz_init2(bn, 1);           \
52
+    } while (0)
53
+#define bn_free(bn)     \
54
+    do {                \
55
+        mpz_clear(bn);  \
56
+        av_free(bn);    \
57
+    } while (0)
58
+#define bn_set_word(bn, w)          mpz_set_ui(bn, w)
59
+#define bn_cmp(a, b)                mpz_cmp(a, b)
60
+#define bn_copy(to, from)           mpz_set(to, from)
61
+#define bn_sub_word(bn, w)          mpz_sub_ui(bn, bn, w)
62
+#define bn_cmp_1(bn)                mpz_cmp_ui(bn, 1)
63
+#define bn_num_bytes(bn)            (mpz_sizeinbase(bn, 2) + 7) / 8
64
+#define bn_bn2bin(bn, buf, len)     nettle_mpz_get_str_256(len, buf, bn)
65
+#define bn_bin2bn(bn, buf, len)                     \
66
+    do {                                            \
67
+        bn_new(bn);                                 \
68
+        if (bn)                                     \
69
+            nettle_mpz_set_str_256_u(bn, len, buf); \
70
+    } while (0)
71
+#define bn_hex2bn(bn, buf, ret)                     \
72
+    do {                                            \
73
+        bn_new(bn);                                 \
74
+        if (bn)                                     \
75
+            ret = (mpz_set_str(bn, buf, 16) == 0);  \
76
+    } while (0)
77
+#define bn_modexp(bn, y, q, p)      mpz_powm(bn, y, q, p)
78
+#define bn_random(bn, num_bytes)    mpz_random(bn, num_bytes);
79
+#elif CONFIG_GCRYPT
80
+#define bn_new(bn)                  bn = gcry_mpi_new(1)
81
+#define bn_free(bn)                 gcry_mpi_release(bn)
82
+#define bn_set_word(bn, w)          gcry_mpi_set_ui(bn, w)
83
+#define bn_cmp(a, b)                gcry_mpi_cmp(a, b)
84
+#define bn_copy(to, from)           gcry_mpi_set(to, from)
85
+#define bn_sub_word(bn, w)          gcry_mpi_sub_ui(bn, bn, w)
86
+#define bn_cmp_1(bn)                gcry_mpi_cmp_ui(bn, 1)
87
+#define bn_num_bytes(bn)            (gcry_mpi_get_nbits(bn) + 7) / 8
88
+#define bn_bn2bin(bn, buf, len)     gcry_mpi_print(GCRYMPI_FMT_USG, buf, len, NULL, bn)
89
+#define bn_bin2bn(bn, buf, len)     gcry_mpi_scan(&bn, GCRYMPI_FMT_USG, buf, len, NULL)
90
+#define bn_hex2bn(bn, buf, ret)     ret = (gcry_mpi_scan(&bn, GCRYMPI_FMT_HEX, buf, 0, 0) == 0)
91
+#define bn_modexp(bn, y, q, p)      gcry_mpi_powm(bn, y, q, p)
92
+#define bn_random(bn, num_bytes)    gcry_mpi_randomize(bn, num_bytes, GCRY_WEAK_RANDOM)
93
+#endif
94
+
95
+#define MAX_BYTES 18000
96
+
97
+#define dh_new()                    av_malloc(sizeof(FF_DH))
98
+
99
+static FFBigNum dh_generate_key(FF_DH *dh)
100
+{
101
+    int num_bytes;
102
+
103
+    num_bytes = bn_num_bytes(dh->p) - 1;
104
+    if (num_bytes <= 0 || num_bytes > MAX_BYTES)
105
+        return NULL;
106
+
107
+    bn_new(dh->priv_key);
108
+    if (!dh->priv_key)
109
+        return NULL;
110
+    bn_random(dh->priv_key, num_bytes);
111
+
112
+    bn_new(dh->pub_key);
113
+    if (!dh->pub_key) {
114
+        bn_free(dh->priv_key);
115
+        return NULL;
116
+    }
117
+
118
+    bn_modexp(dh->pub_key, dh->g, dh->priv_key, dh->p);
119
+
120
+    return dh->pub_key;
121
+}
122
+
123
+static int dh_compute_key(FF_DH *dh, FFBigNum pub_key_bn,
124
+                          uint32_t pub_key_len, uint8_t *secret_key)
125
+{
126
+    FFBigNum k;
127
+    int num_bytes;
128
+
129
+    num_bytes = bn_num_bytes(dh->p);
130
+    if (num_bytes <= 0 || num_bytes > MAX_BYTES)
131
+        return -1;
132
+
133
+    bn_new(k);
134
+    if (!k)
135
+        return -1;
136
+
137
+    bn_modexp(k, pub_key_bn, dh->priv_key, dh->p);
138
+    bn_bn2bin(k, secret_key, pub_key_len);
139
+    bn_free(k);
140
+
141
+    /* return the length of the shared secret key like DH_compute_key */
142
+    return pub_key_len;
143
+}
144
+
145
+void ff_dh_free(FF_DH *dh)
146
+{
147
+    bn_free(dh->p);
148
+    bn_free(dh->g);
149
+    bn_free(dh->pub_key);
150
+    bn_free(dh->priv_key);
151
+    av_free(dh);
152
+}
153
+#elif CONFIG_OPENSSL
154
+#define bn_new(bn)                  bn = BN_new()
155
+#define bn_free(bn)                 BN_free(bn)
156
+#define bn_set_word(bn, w)          BN_set_word(bn, w)
157
+#define bn_cmp(a, b)                BN_cmp(a, b)
158
+#define bn_copy(to, from)           BN_copy(to, from)
159
+#define bn_sub_word(bn, w)          BN_sub_word(bn, w)
160
+#define bn_cmp_1(bn)                BN_cmp(bn, BN_value_one())
161
+#define bn_num_bytes(bn)            BN_num_bytes(bn)
162
+#define bn_bn2bin(bn, buf, len)     BN_bn2bin(bn, buf)
163
+#define bn_bin2bn(bn, buf, len)     bn = BN_bin2bn(buf, len, 0)
164
+#define bn_hex2bn(bn, buf, ret)     ret = BN_hex2bn(&bn, buf)
165
+#define bn_modexp(bn, y, q, p)               \
166
+    do {                                     \
167
+        BN_CTX *ctx = BN_CTX_new();          \
168
+        if (!ctx)                            \
169
+            return AVERROR(ENOMEM);          \
170
+        if (!BN_mod_exp(bn, y, q, p, ctx)) { \
171
+            BN_CTX_free(ctx);                \
172
+            return AVERROR(EINVAL);          \
173
+        }                                    \
174
+        BN_CTX_free(ctx);                    \
175
+    } while (0)
176
+
177
+#define dh_new()                                DH_new()
178
+#define dh_generate_key(dh)                     DH_generate_key(dh)
179
+#define dh_compute_key(dh, pub, len, secret)    DH_compute_key(secret, pub, dh)
180
+
181
+void ff_dh_free(FF_DH *dh)
182
+{
183
+    DH_free(dh);
184
+}
185
+#endif
186
+
187
+static int dh_is_valid_public_key(FFBigNum y, FFBigNum p, FFBigNum q)
188
+{
189
+    FFBigNum bn = NULL;
190
+    int ret = AVERROR(EINVAL);
191
+
192
+    bn_new(bn);
193
+    if (!bn)
194
+        return AVERROR(ENOMEM);
195
+
196
+    /* y must lie in [2, p - 1] */
197
+    bn_set_word(bn, 1);
198
+    if (!bn_cmp(y, bn))
199
+        goto fail;
200
+
201
+    /* bn = p - 2 */
202
+    bn_copy(bn, p);
203
+    bn_sub_word(bn, 1);
204
+    if (!bn_cmp(y, bn))
205
+        goto fail;
206
+
207
+    /* Verify with Sophie-Germain prime
208
+     *
209
+     * This is a nice test to make sure the public key position is calculated
210
+     * correctly. This test will fail in about 50% of the cases if applied to
211
+     * random data.
212
+     */
213
+    /* y must fulfill y^q mod p = 1 */
214
+    bn_modexp(bn, y, q, p);
215
+
216
+    if (bn_cmp_1(bn))
217
+        goto fail;
218
+
219
+    ret = 0;
220
+fail:
221
+    bn_free(bn);
222
+
223
+    return ret;
224
+}
225
+
226
+av_cold FF_DH *ff_dh_init(int key_len)
227
+{
228
+    FF_DH *dh;
229
+    int ret;
230
+
231
+    if (!(dh = dh_new()))
232
+        return NULL;
233
+
234
+    bn_new(dh->g);
235
+    if (!dh->g)
236
+        goto fail;
237
+
238
+    bn_hex2bn(dh->p, P1024, ret);
239
+    if (!ret)
240
+        goto fail;
241
+
242
+    bn_set_word(dh->g, 2);
243
+    dh->length = key_len;
244
+
245
+    return dh;
246
+
247
+fail:
248
+    ff_dh_free(dh);
249
+
250
+    return NULL;
251
+}
252
+
253
+int ff_dh_generate_public_key(FF_DH *dh)
254
+{
255
+    int ret = 0;
256
+
257
+    while (!ret) {
258
+        FFBigNum q1 = NULL;
259
+
260
+        if (!dh_generate_key(dh))
261
+            return AVERROR(EINVAL);
262
+
263
+        bn_hex2bn(q1, Q1024, ret);
264
+        if (!ret)
265
+            return AVERROR(ENOMEM);
266
+
267
+        ret = dh_is_valid_public_key(dh->pub_key, dh->p, q1);
268
+        bn_free(q1);
269
+
270
+        if (!ret) {
271
+            /* the public key is valid */
272
+            break;
273
+        }
274
+    }
275
+
276
+    return ret;
277
+}
278
+
279
+int ff_dh_write_public_key(FF_DH *dh, uint8_t *pub_key, int pub_key_len)
280
+{
281
+    int len;
282
+
283
+    /* compute the length of the public key */
284
+    len = bn_num_bytes(dh->pub_key);
285
+    if (len <= 0 || len > pub_key_len)
286
+        return AVERROR(EINVAL);
287
+
288
+    /* convert the public key value into big-endian form */
289
+    memset(pub_key, 0, pub_key_len);
290
+    bn_bn2bin(dh->pub_key, pub_key + pub_key_len - len, len);
291
+
292
+    return 0;
293
+}
294
+
295
+int ff_dh_compute_shared_secret_key(FF_DH *dh, const uint8_t *pub_key,
296
+                                    int pub_key_len, uint8_t *secret_key)
297
+{
298
+    FFBigNum q1 = NULL, pub_key_bn = NULL;
299
+    int ret;
300
+
301
+    /* convert the big-endian form of the public key into a bignum */
302
+    bn_bin2bn(pub_key_bn, pub_key, pub_key_len);
303
+    if (!pub_key_bn)
304
+        return AVERROR(ENOMEM);
305
+
306
+    /* convert the string containing a hexadecimal number into a bignum */
307
+    bn_hex2bn(q1, Q1024, ret);
308
+    if (!ret) {
309
+        ret = AVERROR(ENOMEM);
310
+        goto fail;
311
+    }
312
+
313
+    /* when the public key is valid we have to compute the shared secret key */
314
+    if ((ret = dh_is_valid_public_key(pub_key_bn, dh->p, q1)) < 0) {
315
+        goto fail;
316
+    } else if ((ret = dh_compute_key(dh, pub_key_bn, pub_key_len,
317
+                                     secret_key)) < 0) {
318
+        ret = AVERROR(EINVAL);
319
+        goto fail;
320
+    }
321
+
322
+fail:
323
+    bn_free(pub_key_bn);
324
+    bn_free(q1);
325
+
326
+    return ret;
327
+}
328
+
0 329
new file mode 100644
... ...
@@ -0,0 +1,102 @@
0
+/*
1
+ * RTMP Diffie-Hellmann utilities
2
+ * Copyright (c) 2012 Samuel Pitoiset
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#ifndef AVFORMAT_RTMPDH_H
22
+#define AVFORMAT_RTMPDH_H
23
+
24
+#include "avformat.h"
25
+#include "config.h"
26
+
27
+#if CONFIG_NETTLE || CONFIG_GCRYPT
28
+#if CONFIG_NETTLE
29
+#include <gmp.h>
30
+#include <nettle/bignum.h>
31
+
32
+typedef mpz_ptr FFBigNum;
33
+#elif CONFIG_GCRYPT
34
+#include <gcrypt.h>
35
+
36
+typedef gcry_mpi_t FFBigNum;
37
+#endif
38
+
39
+typedef struct FF_DH {
40
+  FFBigNum p;
41
+  FFBigNum g;
42
+  FFBigNum pub_key;
43
+  FFBigNum priv_key;
44
+  long length;
45
+} FF_DH;
46
+
47
+#elif CONFIG_OPENSSL
48
+#include <openssl/bn.h>
49
+#include <openssl/dh.h>
50
+
51
+typedef BIGNUM *FFBigNum;
52
+typedef DH FF_DH;
53
+#endif
54
+
55
+/**
56
+ * Initialize a Diffie-Hellmann context.
57
+ *
58
+ * @param key_len length of the key
59
+ * @return a new Diffie-Hellmann context on success, NULL otherwise
60
+ */
61
+FF_DH *ff_dh_init(int key_len);
62
+
63
+/**
64
+ * Free a Diffie-Hellmann context.
65
+ *
66
+ * @param dh a Diffie-Hellmann context to free
67
+ */
68
+void ff_dh_free(FF_DH *dh);
69
+
70
+/**
71
+ * Generate a public key.
72
+ *
73
+ * @param dh a Diffie-Hellmann context
74
+ * @return zero on success, negative value otherwise
75
+ */
76
+int ff_dh_generate_public_key(FF_DH *dh);
77
+
78
+/**
79
+ * Write the public key into the given buffer.
80
+ *
81
+ * @param dh            a Diffie-Hellmann context, containing the public key to write
82
+ * @param pub_key       the buffer where the public key is written
83
+ * @param pub_key_len   the length of the buffer
84
+ * @return zero on success, negative value otherwise
85
+ */
86
+int ff_dh_write_public_key(FF_DH *dh, uint8_t *pub_key, int pub_key_len);
87
+
88
+/**
89
+ * Compute the shared secret key from the private FF_DH value and the
90
+ * other party's public value.
91
+ *
92
+ * @param dh            a Diffie-Hellmann context, containing the private key
93
+ * @param pub_key       the buffer containing the public key
94
+ * @param pub_key_len   the length of the buffer
95
+ * @param secret_key    the buffer where the secret key is written
96
+ * @return length of the shared secret key on success, negative value otherwise
97
+ */
98
+int ff_dh_compute_shared_secret_key(FF_DH *dh, const uint8_t *pub_key,
99
+                                    int pub_key_len, uint8_t *secret_key);
100
+
101
+#endif /* AVFORMAT_RTMPDH_H */
... ...
@@ -37,6 +37,7 @@
37 37
 
38 38
 #include "flv.h"
39 39
 #include "rtmp.h"
40
+#include "rtmpcrypt.h"
40 41
 #include "rtmppkt.h"
41 42
 #include "url.h"
42 43
 
... ...
@@ -92,6 +93,7 @@ typedef struct RTMPContext {
92 92
     int           server_bw;                  ///< server bandwidth
93 93
     int           client_buffer_time;         ///< client buffer time in ms
94 94
     int           flush_interval;             ///< number of packets flushed in the same request (RTMPT only)
95
+    int           encrypted;                  ///< use an encrypted connection (RTMPE only)
95 96
 } RTMPContext;
96 97
 
97 98
 #define PLAYER_KEY_OPEN_PART_LEN 30   ///< length of partial key used for first client digest signing
... ...
@@ -590,23 +592,8 @@ static int gen_bytes_read(URLContext *s, RTMPContext *rt, uint32_t ts)
590 590
     return ret;
591 591
 }
592 592
 
593
-//TODO: Move HMAC code somewhere. Eventually.
594
-#define HMAC_IPAD_VAL 0x36
595
-#define HMAC_OPAD_VAL 0x5C
596
-
597
-/**
598
- * Calculate HMAC-SHA2 digest for RTMP handshake packets.
599
- *
600
- * @param src    input buffer
601
- * @param len    input buffer length (should be 1536)
602
- * @param gap    offset in buffer where 32 bytes should not be taken into account
603
- *               when calculating digest (since it will be used to store that digest)
604
- * @param key    digest key
605
- * @param keylen digest key length
606
- * @param dst    buffer where calculated digest will be stored (32 bytes)
607
- */
608
-static int rtmp_calc_digest(const uint8_t *src, int len, int gap,
609
-                            const uint8_t *key, int keylen, uint8_t *dst)
593
+int ff_rtmp_calc_digest(const uint8_t *src, int len, int gap,
594
+                        const uint8_t *key, int keylen, uint8_t *dst)
610 595
 {
611 596
     struct AVSHA *sha;
612 597
     uint8_t hmac_buf[64+32] = {0};
... ...
@@ -647,25 +634,38 @@ static int rtmp_calc_digest(const uint8_t *src, int len, int gap,
647 647
     return 0;
648 648
 }
649 649
 
650
+int ff_rtmp_calc_digest_pos(const uint8_t *buf, int off, int mod_val,
651
+                            int add_val)
652
+{
653
+    int i, digest_pos = 0;
654
+
655
+    for (i = 0; i < 4; i++)
656
+        digest_pos += buf[i + off];
657
+    digest_pos = digest_pos % mod_val + add_val;
658
+
659
+    return digest_pos;
660
+}
661
+
650 662
 /**
651 663
  * Put HMAC-SHA2 digest of packet data (except for the bytes where this digest
652 664
  * will be stored) into that packet.
653 665
  *
654 666
  * @param buf handshake data (1536 bytes)
667
+ * @param encrypted use an encrypted connection (RTMPE)
655 668
  * @return offset to the digest inside input data
656 669
  */
657
-static int rtmp_handshake_imprint_with_digest(uint8_t *buf)
670
+static int rtmp_handshake_imprint_with_digest(uint8_t *buf, int encrypted)
658 671
 {
659
-    int i, digest_pos = 0;
660
-    int ret;
672
+    int ret, digest_pos;
661 673
 
662
-    for (i = 8; i < 12; i++)
663
-        digest_pos += buf[i];
664
-    digest_pos = (digest_pos % 728) + 12;
674
+    if (encrypted)
675
+        digest_pos = ff_rtmp_calc_digest_pos(buf, 772, 728, 776);
676
+    else
677
+        digest_pos = ff_rtmp_calc_digest_pos(buf, 8, 728, 12);
665 678
 
666
-    ret = rtmp_calc_digest(buf, RTMP_HANDSHAKE_PACKET_SIZE, digest_pos,
667
-                           rtmp_player_key, PLAYER_KEY_OPEN_PART_LEN,
668
-                           buf + digest_pos);
679
+    ret = ff_rtmp_calc_digest(buf, RTMP_HANDSHAKE_PACKET_SIZE, digest_pos,
680
+                              rtmp_player_key, PLAYER_KEY_OPEN_PART_LEN,
681
+                              buf + digest_pos);
669 682
     if (ret < 0)
670 683
         return ret;
671 684
 
... ...
@@ -681,17 +681,14 @@ static int rtmp_handshake_imprint_with_digest(uint8_t *buf)
681 681
  */
682 682
 static int rtmp_validate_digest(uint8_t *buf, int off)
683 683
 {
684
-    int i, digest_pos = 0;
685 684
     uint8_t digest[32];
686
-    int ret;
685
+    int ret, digest_pos;
687 686
 
688
-    for (i = 0; i < 4; i++)
689
-        digest_pos += buf[i + off];
690
-    digest_pos = (digest_pos % 728) + off + 4;
687
+    digest_pos = ff_rtmp_calc_digest_pos(buf, off, 728, off + 4);
691 688
 
692
-    ret = rtmp_calc_digest(buf, RTMP_HANDSHAKE_PACKET_SIZE, digest_pos,
693
-                           rtmp_server_key, SERVER_KEY_OPEN_PART_LEN,
694
-                           digest);
689
+    ret = ff_rtmp_calc_digest(buf, RTMP_HANDSHAKE_PACKET_SIZE, digest_pos,
690
+                              rtmp_server_key, SERVER_KEY_OPEN_PART_LEN,
691
+                              digest);
695 692
     if (ret < 0)
696 693
         return ret;
697 694
 
... ...
@@ -721,8 +718,9 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
721 721
     uint8_t serverdata[RTMP_HANDSHAKE_PACKET_SIZE+1];
722 722
     int i;
723 723
     int server_pos, client_pos;
724
-    uint8_t digest[32];
725
-    int ret;
724
+    uint8_t digest[32], signature[32];
725
+    int encrypted = rt->encrypted && CONFIG_FFRTMPCRYPT_PROTOCOL;
726
+    int ret, type = 0;
726 727
 
727 728
     av_log(s, AV_LOG_DEBUG, "Handshaking...\n");
728 729
 
... ...
@@ -730,7 +728,24 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
730 730
     // generate handshake packet - 1536 bytes of pseudorandom data
731 731
     for (i = 9; i <= RTMP_HANDSHAKE_PACKET_SIZE; i++)
732 732
         tosend[i] = av_lfg_get(&rnd) >> 24;
733
-    client_pos = rtmp_handshake_imprint_with_digest(tosend + 1);
733
+
734
+    if (encrypted) {
735
+        /* When the client wants to use RTMPE, we have to change the command
736
+         * byte to 0x06 which means to use encrypted data and we have to set
737
+         * the flash version to at least 9.0.115.0. */
738
+        tosend[0] = 6;
739
+        tosend[5] = 128;
740
+        tosend[6] = 0;
741
+        tosend[7] = 3;
742
+        tosend[8] = 2;
743
+
744
+        /* Initialize the Diffie-Hellmann context and generate the public key
745
+         * to send to the server. */
746
+        if ((ret = ff_rtmpe_gen_pub_key(rt->stream, tosend + 1)) < 0)
747
+            return ret;
748
+    }
749
+
750
+    client_pos = rtmp_handshake_imprint_with_digest(tosend + 1, encrypted);
734 751
     if (client_pos < 0)
735 752
         return client_pos;
736 753
 
... ...
@@ -752,6 +767,7 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
752 752
         return ret;
753 753
     }
754 754
 
755
+    av_log(s, AV_LOG_DEBUG, "Type answer %d\n", serverdata[0]);
755 756
     av_log(s, AV_LOG_DEBUG, "Server version %d.%d.%d.%d\n",
756 757
            serverdata[5], serverdata[6], serverdata[7], serverdata[8]);
757 758
 
... ...
@@ -761,6 +777,7 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
761 761
             return server_pos;
762 762
 
763 763
         if (!server_pos) {
764
+            type = 1;
764 765
             server_pos = rtmp_validate_digest(serverdata + 1, 8);
765 766
             if (server_pos < 0)
766 767
                 return server_pos;
... ...
@@ -771,43 +788,88 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
771 771
             }
772 772
         }
773 773
 
774
-        ret = rtmp_calc_digest(tosend + 1 + client_pos, 32, 0, rtmp_server_key,
775
-                               sizeof(rtmp_server_key), digest);
774
+        ret = ff_rtmp_calc_digest(tosend + 1 + client_pos, 32, 0,
775
+                                  rtmp_server_key, sizeof(rtmp_server_key),
776
+                                  digest);
776 777
         if (ret < 0)
777 778
             return ret;
778 779
 
779
-        ret = rtmp_calc_digest(clientdata, RTMP_HANDSHAKE_PACKET_SIZE - 32, 0,
780
-                               digest, 32, digest);
780
+        ret = ff_rtmp_calc_digest(clientdata, RTMP_HANDSHAKE_PACKET_SIZE - 32,
781
+                                  0, digest, 32, signature);
781 782
         if (ret < 0)
782 783
             return ret;
783 784
 
784
-        if (memcmp(digest, clientdata + RTMP_HANDSHAKE_PACKET_SIZE - 32, 32)) {
785
+        if (encrypted) {
786
+            /* Compute the shared secret key sent by the server and initialize
787
+             * the RC4 encryption. */
788
+            if ((ret = ff_rtmpe_compute_secret_key(rt->stream, serverdata + 1,
789
+                                                   tosend + 1, type)) < 0)
790
+                return ret;
791
+
792
+            /* Encrypt the signature received by the server. */
793
+            ff_rtmpe_encrypt_sig(rt->stream, signature, digest, serverdata[0]);
794
+        }
795
+
796
+        if (memcmp(signature, clientdata + RTMP_HANDSHAKE_PACKET_SIZE - 32, 32)) {
785 797
             av_log(s, AV_LOG_ERROR, "Signature mismatch\n");
786 798
             return AVERROR(EIO);
787 799
         }
788 800
 
789 801
         for (i = 0; i < RTMP_HANDSHAKE_PACKET_SIZE; i++)
790 802
             tosend[i] = av_lfg_get(&rnd) >> 24;
791
-        ret = rtmp_calc_digest(serverdata + 1 + server_pos, 32, 0,
792
-                               rtmp_player_key, sizeof(rtmp_player_key),
793
-                               digest);
803
+        ret = ff_rtmp_calc_digest(serverdata + 1 + server_pos, 32, 0,
804
+                                  rtmp_player_key, sizeof(rtmp_player_key),
805
+                                  digest);
794 806
         if (ret < 0)
795 807
             return ret;
796 808
 
797
-        ret = rtmp_calc_digest(tosend, RTMP_HANDSHAKE_PACKET_SIZE - 32, 0,
798
-                               digest, 32,
799
-                               tosend + RTMP_HANDSHAKE_PACKET_SIZE - 32);
809
+        ret = ff_rtmp_calc_digest(tosend, RTMP_HANDSHAKE_PACKET_SIZE - 32, 0,
810
+                                  digest, 32,
811
+                                  tosend + RTMP_HANDSHAKE_PACKET_SIZE - 32);
800 812
         if (ret < 0)
801 813
             return ret;
802 814
 
815
+        if (encrypted) {
816
+            /* Encrypt the signature to be send to the server. */
817
+            ff_rtmpe_encrypt_sig(rt->stream, tosend +
818
+                                 RTMP_HANDSHAKE_PACKET_SIZE - 32, digest,
819
+                                 serverdata[0]);
820
+        }
821
+
803 822
         // write reply back to the server
804 823
         if ((ret = ffurl_write(rt->stream, tosend,
805 824
                                RTMP_HANDSHAKE_PACKET_SIZE)) < 0)
806 825
             return ret;
826
+
827
+        if (encrypted) {
828
+            /* Set RC4 keys for encryption and update the keystreams. */
829
+            if ((ret = ff_rtmpe_update_keystream(rt->stream)) < 0)
830
+                return ret;
831
+        }
807 832
     } else {
833
+        if (encrypted) {
834
+            /* Compute the shared secret key sent by the server and initialize
835
+             * the RC4 encryption. */
836
+            if ((ret = ff_rtmpe_compute_secret_key(rt->stream, serverdata + 1,
837
+                            tosend + 1, 1)) < 0)
838
+                return ret;
839
+
840
+            if (serverdata[0] == 9) {
841
+                /* Encrypt the signature received by the server. */
842
+                ff_rtmpe_encrypt_sig(rt->stream, signature, digest,
843
+                                     serverdata[0]);
844
+            }
845
+        }
846
+
808 847
         if ((ret = ffurl_write(rt->stream, serverdata + 1,
809 848
                                RTMP_HANDSHAKE_PACKET_SIZE)) < 0)
810 849
             return ret;
850
+
851
+        if (encrypted) {
852
+            /* Set RC4 keys for encryption and update the keystreams. */
853
+            if ((ret = ff_rtmpe_update_keystream(rt->stream)) < 0)
854
+                return ret;
855
+        }
811 856
     }
812 857
 
813 858
     return 0;
... ...
@@ -1130,6 +1192,13 @@ static int rtmp_open(URLContext *s, const char *uri, int flags)
1130 1130
         if (port < 0)
1131 1131
             port = RTMPS_DEFAULT_PORT;
1132 1132
         ff_url_join(buf, sizeof(buf), "tls", NULL, hostname, port, NULL);
1133
+    } else if (!strcmp(proto, "rtmpe") || (!strcmp(proto, "rtmpte"))) {
1134
+        if (!strcmp(proto, "rtmpte"))
1135
+            av_dict_set(&opts, "ffrtmpcrypt_tunneling", "1", 1);
1136
+
1137
+        /* open the encrypted connection */
1138
+        ff_url_join(buf, sizeof(buf), "ffrtmpcrypt", NULL, hostname, port, NULL);
1139
+        rt->encrypted = 1;
1133 1140
     } else {
1134 1141
         /* open the tcp connection */
1135 1142
         if (port < 0)
... ...
@@ -1454,6 +1523,24 @@ URLProtocol ff_rtmp_protocol = {
1454 1454
     .priv_data_class= &rtmp_class,
1455 1455
 };
1456 1456
 
1457
+static const AVClass rtmpe_class = {
1458
+    .class_name = "rtmpe",
1459
+    .item_name  = av_default_item_name,
1460
+    .option     = rtmp_options,
1461
+    .version    = LIBAVUTIL_VERSION_INT,
1462
+};
1463
+
1464
+URLProtocol ff_rtmpe_protocol = {
1465
+    .name            = "rtmpe",
1466
+    .url_open        = rtmp_open,
1467
+    .url_read        = rtmp_read,
1468
+    .url_write       = rtmp_write,
1469
+    .url_close       = rtmp_close,
1470
+    .priv_data_size  = sizeof(RTMPContext),
1471
+    .flags           = URL_PROTOCOL_FLAG_NETWORK,
1472
+    .priv_data_class = &rtmpe_class,
1473
+};
1474
+
1457 1475
 static const AVClass rtmps_class = {
1458 1476
     .class_name = "rtmps",
1459 1477
     .item_name  = av_default_item_name,
... ...
@@ -1490,6 +1577,24 @@ URLProtocol ff_rtmpt_protocol = {
1490 1490
     .priv_data_class = &rtmpt_class,
1491 1491
 };
1492 1492
 
1493
+static const AVClass rtmpte_class = {
1494
+    .class_name = "rtmpte",
1495
+    .item_name  = av_default_item_name,
1496
+    .option     = rtmp_options,
1497
+    .version    = LIBAVUTIL_VERSION_INT,
1498
+};
1499
+
1500
+URLProtocol ff_rtmpte_protocol = {
1501
+    .name            = "rtmpte",
1502
+    .url_open        = rtmp_open,
1503
+    .url_read        = rtmp_read,
1504
+    .url_write       = rtmp_write,
1505
+    .url_close       = rtmp_close,
1506
+    .priv_data_size  = sizeof(RTMPContext),
1507
+    .flags           = URL_PROTOCOL_FLAG_NETWORK,
1508
+    .priv_data_class = &rtmpte_class,
1509
+};
1510
+
1493 1511
 static const AVClass rtmpts_class = {
1494 1512
     .class_name = "rtmpts",
1495 1513
     .item_name  = av_default_item_name,
... ...
@@ -30,7 +30,7 @@
30 30
 #include "libavutil/avutil.h"
31 31
 
32 32
 #define LIBAVFORMAT_VERSION_MAJOR 54
33
-#define LIBAVFORMAT_VERSION_MINOR 19
33
+#define LIBAVFORMAT_VERSION_MINOR 20
34 34
 #define LIBAVFORMAT_VERSION_MICRO 100
35 35
 
36 36
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
... ...
@@ -625,3 +625,15 @@
625 625
     shufps       %1, %1, 0
626 626
 %endif
627 627
 %endmacro
628
+
629
+%macro SHUFFLE_MASK_W 8
630
+    %rep 8
631
+        %if %1>=0x80
632
+            db %1, %1
633
+        %else
634
+            db %1*2
635
+            db %1*2+1
636
+        %endif
637
+        %rotate 1
638
+    %endrep
639
+%endmacro
... ...
@@ -24,6 +24,7 @@
24 24
 #include <stdio.h>
25 25
 #include <string.h>
26 26
 
27
+#include "libavutil/attributes.h"
27 28
 #include "libavutil/avutil.h"
28 29
 #include "libavutil/avassert.h"
29 30
 #include "libavutil/bswap.h"
... ...
@@ -1260,13 +1261,13 @@ YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB,  0)
1260 1260
 YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full,  PIX_FMT_BGR24, 0)
1261 1261
 YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full,  PIX_FMT_RGB24, 0)
1262 1262
 
1263
-void ff_sws_init_output_funcs(SwsContext *c,
1264
-                              yuv2planar1_fn *yuv2plane1,
1265
-                              yuv2planarX_fn *yuv2planeX,
1266
-                              yuv2interleavedX_fn *yuv2nv12cX,
1267
-                              yuv2packed1_fn *yuv2packed1,
1268
-                              yuv2packed2_fn *yuv2packed2,
1269
-                              yuv2packedX_fn *yuv2packedX)
1263
+av_cold void ff_sws_init_output_funcs(SwsContext *c,
1264
+                                      yuv2planar1_fn *yuv2plane1,
1265
+                                      yuv2planarX_fn *yuv2planeX,
1266
+                                      yuv2interleavedX_fn *yuv2nv12cX,
1267
+                                      yuv2packed1_fn *yuv2packed1,
1268
+                                      yuv2packed2_fn *yuv2packed2,
1269
+                                      yuv2packedX_fn *yuv2packedX)
1270 1270
 {
1271 1271
     enum PixelFormat dstFormat = c->dstFormat;
1272 1272
 
... ...
@@ -26,6 +26,7 @@
26 26
 #include "config.h"
27 27
 #include "libswscale/swscale.h"
28 28
 #include "libswscale/swscale_internal.h"
29
+#include "libavutil/attributes.h"
29 30
 #include "libavutil/cpu.h"
30 31
 #include "yuv2rgb_altivec.h"
31 32
 
... ...
@@ -310,7 +311,7 @@ static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
310 310
         }
311 311
 }
312 312
 
313
-void ff_sws_init_swScale_altivec(SwsContext *c)
313
+av_cold void ff_sws_init_swScale_altivec(SwsContext *c)
314 314
 {
315 315
     enum PixelFormat dstFormat = c->dstFormat;
316 316
 
... ...
@@ -95,6 +95,7 @@
95 95
 #include "libswscale/rgb2rgb.h"
96 96
 #include "libswscale/swscale.h"
97 97
 #include "libswscale/swscale_internal.h"
98
+#include "libavutil/attributes.h"
98 99
 #include "libavutil/cpu.h"
99 100
 #include "libavutil/pixdesc.h"
100 101
 #include "yuv2rgb_altivec.h"
... ...
@@ -531,7 +532,7 @@ static int altivec_uyvy_rgb32(SwsContext *c, const unsigned char **in,
531 531
  *
532 532
  * So we just fall back to the C codes for this.
533 533
  */
534
-SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
534
+av_cold SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
535 535
 {
536 536
     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
537 537
         return NULL;
... ...
@@ -591,9 +592,11 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
591 591
     return NULL;
592 592
 }
593 593
 
594
-void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4],
595
-                                    int brightness, int contrast,
596
-                                    int saturation)
594
+av_cold void ff_yuv2rgb_init_tables_altivec(SwsContext *c,
595
+                                            const int inv_table[4],
596
+                                            int brightness,
597
+                                            int contrast,
598
+                                            int saturation)
597 599
 {
598 600
     union {
599 601
         DECLARE_ALIGNED(16, signed short, tmp)[8];
... ...
@@ -25,6 +25,7 @@
25 25
 
26 26
 #include <inttypes.h>
27 27
 
28
+#include "libavutil/attributes.h"
28 29
 #include "libavutil/bswap.h"
29 30
 #include "config.h"
30 31
 #include "rgb2rgb.h"
... ...
@@ -125,7 +126,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
125 125
  * 32-bit C version, and and&add trick by Michael Niedermayer
126 126
  */
127 127
 
128
-void sws_rgb2rgb_init(void)
128
+av_cold void sws_rgb2rgb_init(void)
129 129
 {
130 130
     rgb2rgb_init_c();
131 131
     if (HAVE_MMX)
... ...
@@ -22,6 +22,7 @@
22 22
 #include <inttypes.h>
23 23
 #include <stdlib.h>
24 24
 
25
+#include "libavutil/attributes.h"
25 26
 #include "libswscale/swscale.h"
26 27
 #include "libswscale/swscale_internal.h"
27 28
 
... ...
@@ -184,7 +185,7 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[],
184 184
     return srcSliceH;
185 185
 }
186 186
 
187
-SwsFunc ff_yuv2rgb_init_vis(SwsContext *c)
187
+av_cold SwsFunc ff_yuv2rgb_init_vis(SwsContext *c)
188 188
 {
189 189
     c->sparc_coeffs[5] = c->yCoeff;
190 190
     c->sparc_coeffs[6] = c->vgCoeff;
... ...
@@ -544,7 +544,7 @@ static int swScale(SwsContext *c, const uint8_t *src[],
544 544
         if (!enough_lines)
545 545
             break;  // we can't output a dstY line so let's try with the next slice
546 546
 
547
-#if HAVE_MMX
547
+#if HAVE_MMX && HAVE_INLINE_ASM
548 548
         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
549 549
                               lastInLumBuf, lastInChrBuf);
550 550
 #endif
... ...
@@ -37,6 +37,7 @@
37 37
 #include <windows.h>
38 38
 #endif
39 39
 
40
+#include "libavutil/attributes.h"
40 41
 #include "libavutil/avassert.h"
41 42
 #include "libavutil/avutil.h"
42 43
 #include "libavutil/bswap.h"
... ...
@@ -598,7 +599,7 @@ fail:
598 598
     return ret;
599 599
 }
600 600
 
601
-#if HAVE_MMX2
601
+#if HAVE_MMX2 && HAVE_INLINE_ASM
602 602
 static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
603 603
                            int16_t *filter, int32_t *filterPos, int numSplits)
604 604
 {
... ...
@@ -761,7 +762,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
761 761
 
762 762
     return fragmentPos + 1;
763 763
 }
764
-#endif /* HAVE_MMX2 */
764
+#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
765 765
 
766 766
 static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
767 767
 {
... ...
@@ -856,7 +857,8 @@ SwsContext *sws_alloc_context(void)
856 856
     return c;
857 857
 }
858 858
 
859
-int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
859
+av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
860
+                             SwsFilter *dstFilter)
860 861
 {
861 862
     int i, j;
862 863
     int usesVFilter, usesHFilter;
... ...
@@ -1022,7 +1024,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
1022 1022
         c->srcBpc = 16;
1023 1023
     if (c->dstBpc == 16)
1024 1024
         dst_stride <<= 1;
1025
-    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 &&
1025
+    if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 &&
1026 1026
         c->srcBpc == 8 && c->dstBpc <= 14) {
1027 1027
         c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
1028 1028
                             (srcW & 15) == 0) ? 1 : 0;
... ...
@@ -1061,7 +1063,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
1061 1061
 
1062 1062
     /* precalculate horizontal scaler filter coefficients */
1063 1063
     {
1064
-#if HAVE_MMX2
1064
+#if HAVE_MMX2 && HAVE_INLINE_ASM
1065 1065
 // can't downscale !!!
1066 1066
         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
1067 1067
             c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
... ...
@@ -1105,7 +1107,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
1105 1105
             mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
1106 1106
 #endif
1107 1107
         } else
1108
-#endif /* HAVE_MMX2 */
1108
+#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
1109 1109
         {
1110 1110
             const int filterAlign =
1111 1111
                 (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
... ...
@@ -3,8 +3,8 @@ $(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
3 3
 OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
4 4
 
5 5
 MMX-OBJS                        += x86/rgb2rgb.o                        \
6
-                                   x86/swscale_mmx.o                    \
7
-                                   x86/yuv2rgb_mmx.o                    \
6
+                                   x86/swscale.o                        \
7
+                                   x86/yuv2rgb.o                        \
8 8
 
9 9
 YASM-OBJS                       += x86/input.o                          \
10 10
                                    x86/output.o                         \
... ...
@@ -26,6 +26,7 @@
26 26
 #include <stdint.h>
27 27
 
28 28
 #include "config.h"
29
+#include "libavutil/attributes.h"
29 30
 #include "libavutil/x86_cpu.h"
30 31
 #include "libavutil/cpu.h"
31 32
 #include "libavutil/bswap.h"
... ...
@@ -130,7 +131,7 @@ DECLARE_ASM_CONST(8, uint64_t, mul16_mid)    = 0x2080208020802080ULL;
130 130
 
131 131
 #endif /* HAVE_INLINE_ASM */
132 132
 
133
-void rgb2rgb_init_x86(void)
133
+av_cold void rgb2rgb_init_x86(void)
134 134
 {
135 135
 #if HAVE_INLINE_ASM
136 136
     int cpu_flags = av_get_cpu_flags();
137 137
new file mode 100644
... ...
@@ -0,0 +1,573 @@
0
+/*
1
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include <inttypes.h>
21
+#include "config.h"
22
+#include "libswscale/swscale.h"
23
+#include "libswscale/swscale_internal.h"
24
+#include "libavutil/attributes.h"
25
+#include "libavutil/avassert.h"
26
+#include "libavutil/intreadwrite.h"
27
+#include "libavutil/x86_cpu.h"
28
+#include "libavutil/cpu.h"
29
+#include "libavutil/pixdesc.h"
30
+
31
+#if HAVE_INLINE_ASM
32
+
33
+#define DITHER1XBPP
34
+
35
+DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
36
+DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
37
+DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
38
+DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
39
+
40
+const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
41
+    0x0103010301030103LL,
42
+    0x0200020002000200LL,};
43
+
44
+const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
45
+    0x0602060206020602LL,
46
+    0x0004000400040004LL,};
47
+
48
+DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
49
+DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
50
+DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
51
+DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
52
+DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
53
+DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
54
+
55
+DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
56
+DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
57
+DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
58
+
59
+#ifdef FAST_BGR2YV12
60
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
61
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
62
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
63
+#else
64
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
65
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
66
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
67
+#endif /* FAST_BGR2YV12 */
68
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
69
+DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
70
+DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
71
+
72
+
73
+//MMX versions
74
+#if HAVE_MMX
75
+#undef RENAME
76
+#define COMPILE_TEMPLATE_MMX2 0
77
+#define RENAME(a) a ## _MMX
78
+#include "swscale_template.c"
79
+#endif
80
+
81
+//MMX2 versions
82
+#if HAVE_MMX2
83
+#undef RENAME
84
+#undef COMPILE_TEMPLATE_MMX2
85
+#define COMPILE_TEMPLATE_MMX2 1
86
+#define RENAME(a) a ## _MMX2
87
+#include "swscale_template.c"
88
+#endif
89
+
90
+void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
91
+                           int lastInLumBuf, int lastInChrBuf)
92
+{
93
+    const int dstH= c->dstH;
94
+    const int flags= c->flags;
95
+    int16_t **lumPixBuf= c->lumPixBuf;
96
+    int16_t **chrUPixBuf= c->chrUPixBuf;
97
+    int16_t **alpPixBuf= c->alpPixBuf;
98
+    const int vLumBufSize= c->vLumBufSize;
99
+    const int vChrBufSize= c->vChrBufSize;
100
+    int32_t *vLumFilterPos= c->vLumFilterPos;
101
+    int32_t *vChrFilterPos= c->vChrFilterPos;
102
+    int16_t *vLumFilter= c->vLumFilter;
103
+    int16_t *vChrFilter= c->vChrFilter;
104
+    int32_t *lumMmxFilter= c->lumMmxFilter;
105
+    int32_t *chrMmxFilter= c->chrMmxFilter;
106
+    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
107
+    const int vLumFilterSize= c->vLumFilterSize;
108
+    const int vChrFilterSize= c->vChrFilterSize;
109
+    const int chrDstY= dstY>>c->chrDstVSubSample;
110
+    const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
111
+    const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
112
+
113
+    c->blueDither= ff_dither8[dstY&1];
114
+    if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
115
+        c->greenDither= ff_dither8[dstY&1];
116
+    else
117
+        c->greenDither= ff_dither4[dstY&1];
118
+    c->redDither= ff_dither8[(dstY+1)&1];
119
+    if (dstY < dstH - 2) {
120
+        const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
121
+        const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
122
+        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
123
+        int i;
124
+
125
+        if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
126
+            const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
127
+            int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
128
+            for (i = 0; i < neg;            i++)
129
+                tmpY[i] = lumSrcPtr[neg];
130
+            for (     ; i < end;            i++)
131
+                tmpY[i] = lumSrcPtr[i];
132
+            for (     ; i < vLumFilterSize; i++)
133
+                tmpY[i] = tmpY[i-1];
134
+            lumSrcPtr = tmpY;
135
+
136
+            if (alpSrcPtr) {
137
+                const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
138
+                for (i = 0; i < neg;            i++)
139
+                    tmpA[i] = alpSrcPtr[neg];
140
+                for (     ; i < end;            i++)
141
+                    tmpA[i] = alpSrcPtr[i];
142
+                for (     ; i < vLumFilterSize; i++)
143
+                    tmpA[i] = tmpA[i - 1];
144
+                alpSrcPtr = tmpA;
145
+            }
146
+        }
147
+        if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
148
+            const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize;
149
+            int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
150
+            for (i = 0; i < neg;            i++) {
151
+                tmpU[i] = chrUSrcPtr[neg];
152
+            }
153
+            for (     ; i < end;            i++) {
154
+                tmpU[i] = chrUSrcPtr[i];
155
+            }
156
+            for (     ; i < vChrFilterSize; i++) {
157
+                tmpU[i] = tmpU[i - 1];
158
+            }
159
+            chrUSrcPtr = tmpU;
160
+        }
161
+
162
+        if (flags & SWS_ACCURATE_RND) {
163
+            int s= APCK_SIZE / 8;
164
+            for (i=0; i<vLumFilterSize; i+=2) {
165
+                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
166
+                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
167
+                lumMmxFilter[s*i+APCK_COEF/4  ]=
168
+                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
169
+                + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
170
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
171
+                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
172
+                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
173
+                    alpMmxFilter[s*i+APCK_COEF/4  ]=
174
+                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
175
+                }
176
+            }
177
+            for (i=0; i<vChrFilterSize; i+=2) {
178
+                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
179
+                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
180
+                chrMmxFilter[s*i+APCK_COEF/4  ]=
181
+                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
182
+                + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
183
+            }
184
+        } else {
185
+            for (i=0; i<vLumFilterSize; i++) {
186
+                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
187
+                lumMmxFilter[4*i+2]=
188
+                lumMmxFilter[4*i+3]=
189
+                ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
190
+                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
191
+                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
192
+                    alpMmxFilter[4*i+2]=
193
+                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
194
+                }
195
+            }
196
+            for (i=0; i<vChrFilterSize; i++) {
197
+                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
198
+                chrMmxFilter[4*i+2]=
199
+                chrMmxFilter[4*i+3]=
200
+                ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
201
+            }
202
+        }
203
+    }
204
+}
205
+
206
+#if HAVE_MMX2
207
+static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
208
+                           const int16_t **src, uint8_t *dest, int dstW,
209
+                           const uint8_t *dither, int offset)
210
+{
211
+    if(((int)dest) & 15){
212
+        return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset);
213
+    }
214
+    if (offset) {
215
+        __asm__ volatile("movq       (%0), %%xmm3\n\t"
216
+                         "movdqa    %%xmm3, %%xmm4\n\t"
217
+                         "psrlq       $24, %%xmm3\n\t"
218
+                         "psllq       $40, %%xmm4\n\t"
219
+                         "por       %%xmm4, %%xmm3\n\t"
220
+                         :: "r"(dither)
221
+                         );
222
+    } else {
223
+        __asm__ volatile("movq       (%0), %%xmm3\n\t"
224
+                         :: "r"(dither)
225
+                         );
226
+    }
227
+    __asm__ volatile(
228
+        "pxor      %%xmm0, %%xmm0\n\t"
229
+        "punpcklbw %%xmm0, %%xmm3\n\t"
230
+        "psraw        $4, %%xmm3\n\t"
231
+        "movdqa    %%xmm3, %%xmm4\n\t"
232
+        "movdqa    %%xmm3, %%xmm7\n\t"
233
+        "movl %3, %%ecx\n\t"
234
+        "mov                                 %0, %%"REG_d"  \n\t"\
235
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
236
+        ".p2align                             4             \n\t" /* FIXME Unroll? */\
237
+        "1:                                                 \n\t"\
238
+        "movddup                  8(%%"REG_d"), %%xmm0      \n\t" /* filterCoeff */\
239
+        "movdqa              (%%"REG_S", %%"REG_c", 2), %%xmm2      \n\t" /* srcData */\
240
+        "movdqa            16(%%"REG_S", %%"REG_c", 2), %%xmm5      \n\t" /* srcData */\
241
+        "add                                $16, %%"REG_d"  \n\t"\
242
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
243
+        "test                         %%"REG_S", %%"REG_S"  \n\t"\
244
+        "pmulhw                           %%xmm0, %%xmm2      \n\t"\
245
+        "pmulhw                           %%xmm0, %%xmm5      \n\t"\
246
+        "paddw                            %%xmm2, %%xmm3      \n\t"\
247
+        "paddw                            %%xmm5, %%xmm4      \n\t"\
248
+        " jnz                                1b             \n\t"\
249
+        "psraw                               $3, %%xmm3      \n\t"\
250
+        "psraw                               $3, %%xmm4      \n\t"\
251
+        "packuswb                         %%xmm4, %%xmm3      \n\t"
252
+        "movntdq                          %%xmm3, (%1, %%"REG_c")\n\t"
253
+        "add                         $16, %%"REG_c"         \n\t"\
254
+        "cmp                          %2, %%"REG_c"         \n\t"\
255
+        "movdqa    %%xmm7, %%xmm3\n\t"
256
+        "movdqa    %%xmm7, %%xmm4\n\t"
257
+        "mov                                 %0, %%"REG_d"  \n\t"\
258
+        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
259
+        "jb                                  1b             \n\t"\
260
+        :: "g" (filter),
261
+           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
262
+        : "%"REG_d, "%"REG_S, "%"REG_c
263
+    );
264
+}
265
+#endif
266
+
267
+#endif /* HAVE_INLINE_ASM */
268
+
269
+#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
270
+extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
271
+                                                SwsContext *c, int16_t *data, \
272
+                                                int dstW, const uint8_t *src, \
273
+                                                const int16_t *filter, \
274
+                                                const int32_t *filterPos, int filterSize)
275
+
276
+#define SCALE_FUNCS(filter_n, opt) \
277
+    SCALE_FUNC(filter_n,  8, 15, opt); \
278
+    SCALE_FUNC(filter_n,  9, 15, opt); \
279
+    SCALE_FUNC(filter_n, 10, 15, opt); \
280
+    SCALE_FUNC(filter_n, 12, 15, opt); \
281
+    SCALE_FUNC(filter_n, 14, 15, opt); \
282
+    SCALE_FUNC(filter_n, 16, 15, opt); \
283
+    SCALE_FUNC(filter_n,  8, 19, opt); \
284
+    SCALE_FUNC(filter_n,  9, 19, opt); \
285
+    SCALE_FUNC(filter_n, 10, 19, opt); \
286
+    SCALE_FUNC(filter_n, 12, 19, opt); \
287
+    SCALE_FUNC(filter_n, 14, 19, opt); \
288
+    SCALE_FUNC(filter_n, 16, 19, opt)
289
+
290
+#define SCALE_FUNCS_MMX(opt) \
291
+    SCALE_FUNCS(4, opt); \
292
+    SCALE_FUNCS(8, opt); \
293
+    SCALE_FUNCS(X, opt)
294
+
295
+#define SCALE_FUNCS_SSE(opt) \
296
+    SCALE_FUNCS(4, opt); \
297
+    SCALE_FUNCS(8, opt); \
298
+    SCALE_FUNCS(X4, opt); \
299
+    SCALE_FUNCS(X8, opt)
300
+
301
+#if ARCH_X86_32
302
+SCALE_FUNCS_MMX(mmx);
303
+#endif
304
+SCALE_FUNCS_SSE(sse2);
305
+SCALE_FUNCS_SSE(ssse3);
306
+SCALE_FUNCS_SSE(sse4);
307
+
308
+#define VSCALEX_FUNC(size, opt) \
309
+extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
310
+                                               const int16_t **src, uint8_t *dest, int dstW, \
311
+                                               const uint8_t *dither, int offset)
312
+#define VSCALEX_FUNCS(opt) \
313
+    VSCALEX_FUNC(8,  opt); \
314
+    VSCALEX_FUNC(9,  opt); \
315
+    VSCALEX_FUNC(10, opt)
316
+
317
+#if ARCH_X86_32
318
+VSCALEX_FUNCS(mmx2);
319
+#endif
320
+VSCALEX_FUNCS(sse2);
321
+VSCALEX_FUNCS(sse4);
322
+VSCALEX_FUNC(16, sse4);
323
+VSCALEX_FUNCS(avx);
324
+
325
+#define VSCALE_FUNC(size, opt) \
326
+extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
327
+                                               const uint8_t *dither, int offset)
328
+#define VSCALE_FUNCS(opt1, opt2) \
329
+    VSCALE_FUNC(8,  opt1); \
330
+    VSCALE_FUNC(9,  opt2); \
331
+    VSCALE_FUNC(10, opt2); \
332
+    VSCALE_FUNC(16, opt1)
333
+
334
+#if ARCH_X86_32
335
+VSCALE_FUNCS(mmx, mmx2);
336
+#endif
337
+VSCALE_FUNCS(sse2, sse2);
338
+VSCALE_FUNC(16, sse4);
339
+VSCALE_FUNCS(avx, avx);
340
+
341
+#define INPUT_Y_FUNC(fmt, opt) \
342
+extern void ff_ ## fmt ## ToY_  ## opt(uint8_t *dst, const uint8_t *src, \
343
+                                       int w, uint32_t *unused)
344
+#define INPUT_UV_FUNC(fmt, opt) \
345
+extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
346
+                                       const uint8_t *src, const uint8_t *unused1, \
347
+                                       int w, uint32_t *unused2)
348
+#define INPUT_FUNC(fmt, opt) \
349
+    INPUT_Y_FUNC(fmt, opt); \
350
+    INPUT_UV_FUNC(fmt, opt)
351
+#define INPUT_FUNCS(opt) \
352
+    INPUT_FUNC(uyvy, opt); \
353
+    INPUT_FUNC(yuyv, opt); \
354
+    INPUT_UV_FUNC(nv12, opt); \
355
+    INPUT_UV_FUNC(nv21, opt); \
356
+    INPUT_FUNC(rgba, opt); \
357
+    INPUT_FUNC(bgra, opt); \
358
+    INPUT_FUNC(argb, opt); \
359
+    INPUT_FUNC(abgr, opt); \
360
+    INPUT_FUNC(rgb24, opt); \
361
+    INPUT_FUNC(bgr24, opt)
362
+
363
+#if ARCH_X86_32
364
+INPUT_FUNCS(mmx);
365
+#endif
366
+INPUT_FUNCS(sse2);
367
+INPUT_FUNCS(ssse3);
368
+INPUT_FUNCS(avx);
369
+
370
+av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
371
+{
372
+    int cpu_flags = av_get_cpu_flags();
373
+
374
+#if HAVE_INLINE_ASM
375
+    if (cpu_flags & AV_CPU_FLAG_MMX)
376
+        sws_init_swScale_MMX(c);
377
+#if HAVE_MMX2
378
+    if (cpu_flags & AV_CPU_FLAG_MMX2)
379
+        sws_init_swScale_MMX2(c);
380
+    if (cpu_flags & AV_CPU_FLAG_SSE3){
381
+        if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
382
+            c->yuv2planeX = yuv2yuvX_sse3;
383
+    }
384
+#endif
385
+#endif /* HAVE_INLINE_ASM */
386
+
387
+#if HAVE_YASM
388
+#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
389
+    if (c->srcBpc == 8) { \
390
+        hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
391
+                                     ff_hscale8to19_ ## filtersize ## _ ## opt1; \
392
+    } else if (c->srcBpc == 9) { \
393
+        hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
394
+                                     ff_hscale9to19_ ## filtersize ## _ ## opt1; \
395
+    } else if (c->srcBpc == 10) { \
396
+        hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
397
+                                     ff_hscale10to19_ ## filtersize ## _ ## opt1; \
398
+    } else if (c->srcBpc == 12) { \
399
+        hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
400
+                                     ff_hscale12to19_ ## filtersize ## _ ## opt1; \
401
+    } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \
402
+        hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
403
+                                     ff_hscale14to19_ ## filtersize ## _ ## opt1; \
404
+    } else { /* c->srcBpc == 16 */ \
405
+        av_assert0(c->srcBpc == 16);\
406
+        hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
407
+                                     ff_hscale16to19_ ## filtersize ## _ ## opt1; \
408
+    } \
409
+} while (0)
410
+#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
411
+    switch (filtersize) { \
412
+    case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
413
+    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
414
+    default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
415
+    }
416
+#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
417
+switch(c->dstBpc){ \
418
+    case 16:                          do_16_case;                          break; \
419
+    case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
420
+    case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_  ## opt; break; \
421
+    default: if (condition_8bit)    /*vscalefn = ff_yuv2planeX_8_  ## opt;*/ break; \
422
+    }
423
+#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
424
+    switch(c->dstBpc){ \
425
+    case 16: if (!isBE(c->dstFormat))            vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
426
+    case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
427
+    case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_  ## opt2;  break; \
428
+    case 8:                                      vscalefn = ff_yuv2plane1_8_  ## opt1;  break; \
429
+    default: av_assert0(c->dstBpc>8); \
430
+    }
431
+#define case_rgb(x, X, opt) \
432
+        case PIX_FMT_ ## X: \
433
+            c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
434
+            if (!c->chrSrcHSubSample) \
435
+                c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
436
+            break
437
+#if ARCH_X86_32
438
+    if (cpu_flags & AV_CPU_FLAG_MMX) {
439
+        ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
440
+        ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
441
+        ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
442
+
443
+        switch (c->srcFormat) {
444
+        case PIX_FMT_Y400A:
445
+            c->lumToYV12 = ff_yuyvToY_mmx;
446
+            if (c->alpPixBuf)
447
+                c->alpToYV12 = ff_uyvyToY_mmx;
448
+            break;
449
+        case PIX_FMT_YUYV422:
450
+            c->lumToYV12 = ff_yuyvToY_mmx;
451
+            c->chrToYV12 = ff_yuyvToUV_mmx;
452
+            break;
453
+        case PIX_FMT_UYVY422:
454
+            c->lumToYV12 = ff_uyvyToY_mmx;
455
+            c->chrToYV12 = ff_uyvyToUV_mmx;
456
+            break;
457
+        case PIX_FMT_NV12:
458
+            c->chrToYV12 = ff_nv12ToUV_mmx;
459
+            break;
460
+        case PIX_FMT_NV21:
461
+            c->chrToYV12 = ff_nv21ToUV_mmx;
462
+            break;
463
+        case_rgb(rgb24, RGB24, mmx);
464
+        case_rgb(bgr24, BGR24, mmx);
465
+        case_rgb(bgra,  BGRA,  mmx);
466
+        case_rgb(rgba,  RGBA,  mmx);
467
+        case_rgb(abgr,  ABGR,  mmx);
468
+        case_rgb(argb,  ARGB,  mmx);
469
+        default:
470
+            break;
471
+        }
472
+    }
473
+    if (cpu_flags & AV_CPU_FLAG_MMX2) {
474
+        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1);
475
+    }
476
+#endif
477
+#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
478
+    switch (filtersize) { \
479
+    case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
480
+    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
481
+    default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
482
+             else                ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
483
+             break; \
484
+    }
485
+    if (cpu_flags & AV_CPU_FLAG_SSE2) {
486
+        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
487
+        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
488
+        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
489
+                            HAVE_ALIGNED_STACK || ARCH_X86_64);
490
+        ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
491
+
492
+        switch (c->srcFormat) {
493
+        case PIX_FMT_Y400A:
494
+            c->lumToYV12 = ff_yuyvToY_sse2;
495
+            if (c->alpPixBuf)
496
+                c->alpToYV12 = ff_uyvyToY_sse2;
497
+            break;
498
+        case PIX_FMT_YUYV422:
499
+            c->lumToYV12 = ff_yuyvToY_sse2;
500
+            c->chrToYV12 = ff_yuyvToUV_sse2;
501
+            break;
502
+        case PIX_FMT_UYVY422:
503
+            c->lumToYV12 = ff_uyvyToY_sse2;
504
+            c->chrToYV12 = ff_uyvyToUV_sse2;
505
+            break;
506
+        case PIX_FMT_NV12:
507
+            c->chrToYV12 = ff_nv12ToUV_sse2;
508
+            break;
509
+        case PIX_FMT_NV21:
510
+            c->chrToYV12 = ff_nv21ToUV_sse2;
511
+            break;
512
+        case_rgb(rgb24, RGB24, sse2);
513
+        case_rgb(bgr24, BGR24, sse2);
514
+        case_rgb(bgra,  BGRA,  sse2);
515
+        case_rgb(rgba,  RGBA,  sse2);
516
+        case_rgb(abgr,  ABGR,  sse2);
517
+        case_rgb(argb,  ARGB,  sse2);
518
+        default:
519
+            break;
520
+        }
521
+    }
522
+    if (cpu_flags & AV_CPU_FLAG_SSSE3) {
523
+        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
524
+        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
525
+        switch (c->srcFormat) {
526
+        case_rgb(rgb24, RGB24, ssse3);
527
+        case_rgb(bgr24, BGR24, ssse3);
528
+        default:
529
+            break;
530
+        }
531
+    }
532
+    if (cpu_flags & AV_CPU_FLAG_SSE4) {
533
+        /* Xto15 don't need special sse4 functions */
534
+        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
535
+        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
536
+        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
537
+                            if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
538
+                            HAVE_ALIGNED_STACK || ARCH_X86_64);
539
+        if (c->dstBpc == 16 && !isBE(c->dstFormat))
540
+            c->yuv2plane1 = ff_yuv2plane1_16_sse4;
541
+    }
542
+
543
+    if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) {
544
+        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
545
+                            HAVE_ALIGNED_STACK || ARCH_X86_64);
546
+        ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
547
+
548
+        switch (c->srcFormat) {
549
+        case PIX_FMT_YUYV422:
550
+            c->chrToYV12 = ff_yuyvToUV_avx;
551
+            break;
552
+        case PIX_FMT_UYVY422:
553
+            c->chrToYV12 = ff_uyvyToUV_avx;
554
+            break;
555
+        case PIX_FMT_NV12:
556
+            c->chrToYV12 = ff_nv12ToUV_avx;
557
+            break;
558
+        case PIX_FMT_NV21:
559
+            c->chrToYV12 = ff_nv21ToUV_avx;
560
+            break;
561
+        case_rgb(rgb24, RGB24, avx);
562
+        case_rgb(bgr24, BGR24, avx);
563
+        case_rgb(bgra,  BGRA,  avx);
564
+        case_rgb(rgba,  RGBA,  avx);
565
+        case_rgb(abgr,  ABGR,  avx);
566
+        case_rgb(argb,  ARGB,  avx);
567
+        default:
568
+            break;
569
+        }
570
+    }
571
+#endif
572
+}
0 573
deleted file mode 100644
... ...
@@ -1,572 +0,0 @@
1
-/*
2
- * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
3
- *
4
- * This file is part of FFmpeg.
5
- *
6
- * FFmpeg is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU Lesser General Public
8
- * License as published by the Free Software Foundation; either
9
- * version 2.1 of the License, or (at your option) any later version.
10
- *
11
- * FFmpeg is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
- * Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public
17
- * License along with FFmpeg; if not, write to the Free Software
18
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
- */
20
-
21
-#include <inttypes.h>
22
-#include "config.h"
23
-#include "libswscale/swscale.h"
24
-#include "libswscale/swscale_internal.h"
25
-#include "libavutil/avassert.h"
26
-#include "libavutil/intreadwrite.h"
27
-#include "libavutil/x86_cpu.h"
28
-#include "libavutil/cpu.h"
29
-#include "libavutil/pixdesc.h"
30
-
31
-#if HAVE_INLINE_ASM
32
-
33
-#define DITHER1XBPP
34
-
35
-DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
36
-DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
37
-DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
38
-DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
39
-
40
-const DECLARE_ALIGNED(8, uint64_t, ff_dither4)[2] = {
41
-    0x0103010301030103LL,
42
-    0x0200020002000200LL,};
43
-
44
-const DECLARE_ALIGNED(8, uint64_t, ff_dither8)[2] = {
45
-    0x0602060206020602LL,
46
-    0x0004000400040004LL,};
47
-
48
-DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
49
-DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
50
-DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
51
-DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
52
-DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
53
-DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
54
-
55
-DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
56
-DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
57
-DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
58
-
59
-#ifdef FAST_BGR2YV12
60
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
61
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
62
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
63
-#else
64
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
65
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
66
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
67
-#endif /* FAST_BGR2YV12 */
68
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
69
-DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
70
-DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
71
-
72
-
73
-//MMX versions
74
-#if HAVE_MMX
75
-#undef RENAME
76
-#define COMPILE_TEMPLATE_MMX2 0
77
-#define RENAME(a) a ## _MMX
78
-#include "swscale_template.c"
79
-#endif
80
-
81
-//MMX2 versions
82
-#if HAVE_MMX2
83
-#undef RENAME
84
-#undef COMPILE_TEMPLATE_MMX2
85
-#define COMPILE_TEMPLATE_MMX2 1
86
-#define RENAME(a) a ## _MMX2
87
-#include "swscale_template.c"
88
-#endif
89
-
90
-void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
91
-                           int lastInLumBuf, int lastInChrBuf)
92
-{
93
-    const int dstH= c->dstH;
94
-    const int flags= c->flags;
95
-    int16_t **lumPixBuf= c->lumPixBuf;
96
-    int16_t **chrUPixBuf= c->chrUPixBuf;
97
-    int16_t **alpPixBuf= c->alpPixBuf;
98
-    const int vLumBufSize= c->vLumBufSize;
99
-    const int vChrBufSize= c->vChrBufSize;
100
-    int32_t *vLumFilterPos= c->vLumFilterPos;
101
-    int32_t *vChrFilterPos= c->vChrFilterPos;
102
-    int16_t *vLumFilter= c->vLumFilter;
103
-    int16_t *vChrFilter= c->vChrFilter;
104
-    int32_t *lumMmxFilter= c->lumMmxFilter;
105
-    int32_t *chrMmxFilter= c->chrMmxFilter;
106
-    int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
107
-    const int vLumFilterSize= c->vLumFilterSize;
108
-    const int vChrFilterSize= c->vChrFilterSize;
109
-    const int chrDstY= dstY>>c->chrDstVSubSample;
110
-    const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
111
-    const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
112
-
113
-    c->blueDither= ff_dither8[dstY&1];
114
-    if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
115
-        c->greenDither= ff_dither8[dstY&1];
116
-    else
117
-        c->greenDither= ff_dither4[dstY&1];
118
-    c->redDither= ff_dither8[(dstY+1)&1];
119
-    if (dstY < dstH - 2) {
120
-        const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
121
-        const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
122
-        const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
123
-        int i;
124
-
125
-        if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
126
-            const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
127
-            int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
128
-            for (i = 0; i < neg;            i++)
129
-                tmpY[i] = lumSrcPtr[neg];
130
-            for (     ; i < end;            i++)
131
-                tmpY[i] = lumSrcPtr[i];
132
-            for (     ; i < vLumFilterSize; i++)
133
-                tmpY[i] = tmpY[i-1];
134
-            lumSrcPtr = tmpY;
135
-
136
-            if (alpSrcPtr) {
137
-                const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
138
-                for (i = 0; i < neg;            i++)
139
-                    tmpA[i] = alpSrcPtr[neg];
140
-                for (     ; i < end;            i++)
141
-                    tmpA[i] = alpSrcPtr[i];
142
-                for (     ; i < vLumFilterSize; i++)
143
-                    tmpA[i] = tmpA[i - 1];
144
-                alpSrcPtr = tmpA;
145
-            }
146
-        }
147
-        if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
148
-            const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize;
149
-            int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
150
-            for (i = 0; i < neg;            i++) {
151
-                tmpU[i] = chrUSrcPtr[neg];
152
-            }
153
-            for (     ; i < end;            i++) {
154
-                tmpU[i] = chrUSrcPtr[i];
155
-            }
156
-            for (     ; i < vChrFilterSize; i++) {
157
-                tmpU[i] = tmpU[i - 1];
158
-            }
159
-            chrUSrcPtr = tmpU;
160
-        }
161
-
162
-        if (flags & SWS_ACCURATE_RND) {
163
-            int s= APCK_SIZE / 8;
164
-            for (i=0; i<vLumFilterSize; i+=2) {
165
-                *(const void**)&lumMmxFilter[s*i              ]= lumSrcPtr[i  ];
166
-                *(const void**)&lumMmxFilter[s*i+APCK_PTR2/4  ]= lumSrcPtr[i+(vLumFilterSize>1)];
167
-                lumMmxFilter[s*i+APCK_COEF/4  ]=
168
-                lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
169
-                + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
170
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
171
-                    *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
172
-                    *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
173
-                    alpMmxFilter[s*i+APCK_COEF/4  ]=
174
-                    alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
175
-                }
176
-            }
177
-            for (i=0; i<vChrFilterSize; i+=2) {
178
-                *(const void**)&chrMmxFilter[s*i              ]= chrUSrcPtr[i  ];
179
-                *(const void**)&chrMmxFilter[s*i+APCK_PTR2/4  ]= chrUSrcPtr[i+(vChrFilterSize>1)];
180
-                chrMmxFilter[s*i+APCK_COEF/4  ]=
181
-                chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i    ]
182
-                + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
183
-            }
184
-        } else {
185
-            for (i=0; i<vLumFilterSize; i++) {
186
-                *(const void**)&lumMmxFilter[4*i+0]= lumSrcPtr[i];
187
-                lumMmxFilter[4*i+2]=
188
-                lumMmxFilter[4*i+3]=
189
-                ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
190
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
191
-                    *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
192
-                    alpMmxFilter[4*i+2]=
193
-                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
194
-                }
195
-            }
196
-            for (i=0; i<vChrFilterSize; i++) {
197
-                *(const void**)&chrMmxFilter[4*i+0]= chrUSrcPtr[i];
198
-                chrMmxFilter[4*i+2]=
199
-                chrMmxFilter[4*i+3]=
200
-                ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
201
-            }
202
-        }
203
-    }
204
-}
205
-
206
-#if HAVE_MMX2
207
-static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
208
-                           const int16_t **src, uint8_t *dest, int dstW,
209
-                           const uint8_t *dither, int offset)
210
-{
211
-    if(((int)dest) & 15){
212
-        return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset);
213
-    }
214
-    if (offset) {
215
-        __asm__ volatile("movq       (%0), %%xmm3\n\t"
216
-                         "movdqa    %%xmm3, %%xmm4\n\t"
217
-                         "psrlq       $24, %%xmm3\n\t"
218
-                         "psllq       $40, %%xmm4\n\t"
219
-                         "por       %%xmm4, %%xmm3\n\t"
220
-                         :: "r"(dither)
221
-                         );
222
-    } else {
223
-        __asm__ volatile("movq       (%0), %%xmm3\n\t"
224
-                         :: "r"(dither)
225
-                         );
226
-    }
227
-    __asm__ volatile(
228
-        "pxor      %%xmm0, %%xmm0\n\t"
229
-        "punpcklbw %%xmm0, %%xmm3\n\t"
230
-        "psraw        $4, %%xmm3\n\t"
231
-        "movdqa    %%xmm3, %%xmm4\n\t"
232
-        "movdqa    %%xmm3, %%xmm7\n\t"
233
-        "movl %3, %%ecx\n\t"
234
-        "mov                                 %0, %%"REG_d"  \n\t"\
235
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
236
-        ".p2align                             4             \n\t" /* FIXME Unroll? */\
237
-        "1:                                                 \n\t"\
238
-        "movddup                  8(%%"REG_d"), %%xmm0      \n\t" /* filterCoeff */\
239
-        "movdqa              (%%"REG_S", %%"REG_c", 2), %%xmm2      \n\t" /* srcData */\
240
-        "movdqa            16(%%"REG_S", %%"REG_c", 2), %%xmm5      \n\t" /* srcData */\
241
-        "add                                $16, %%"REG_d"  \n\t"\
242
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
243
-        "test                         %%"REG_S", %%"REG_S"  \n\t"\
244
-        "pmulhw                           %%xmm0, %%xmm2      \n\t"\
245
-        "pmulhw                           %%xmm0, %%xmm5      \n\t"\
246
-        "paddw                            %%xmm2, %%xmm3      \n\t"\
247
-        "paddw                            %%xmm5, %%xmm4      \n\t"\
248
-        " jnz                                1b             \n\t"\
249
-        "psraw                               $3, %%xmm3      \n\t"\
250
-        "psraw                               $3, %%xmm4      \n\t"\
251
-        "packuswb                         %%xmm4, %%xmm3      \n\t"
252
-        "movntdq                          %%xmm3, (%1, %%"REG_c")\n\t"
253
-        "add                         $16, %%"REG_c"         \n\t"\
254
-        "cmp                          %2, %%"REG_c"         \n\t"\
255
-        "movdqa    %%xmm7, %%xmm3\n\t"
256
-        "movdqa    %%xmm7, %%xmm4\n\t"
257
-        "mov                                 %0, %%"REG_d"  \n\t"\
258
-        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
259
-        "jb                                  1b             \n\t"\
260
-        :: "g" (filter),
261
-           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
262
-        : "%"REG_d, "%"REG_S, "%"REG_c
263
-    );
264
-}
265
-#endif
266
-
267
-#endif /* HAVE_INLINE_ASM */
268
-
269
-#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
270
-extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
271
-                                                SwsContext *c, int16_t *data, \
272
-                                                int dstW, const uint8_t *src, \
273
-                                                const int16_t *filter, \
274
-                                                const int32_t *filterPos, int filterSize)
275
-
276
-#define SCALE_FUNCS(filter_n, opt) \
277
-    SCALE_FUNC(filter_n,  8, 15, opt); \
278
-    SCALE_FUNC(filter_n,  9, 15, opt); \
279
-    SCALE_FUNC(filter_n, 10, 15, opt); \
280
-    SCALE_FUNC(filter_n, 12, 15, opt); \
281
-    SCALE_FUNC(filter_n, 14, 15, opt); \
282
-    SCALE_FUNC(filter_n, 16, 15, opt); \
283
-    SCALE_FUNC(filter_n,  8, 19, opt); \
284
-    SCALE_FUNC(filter_n,  9, 19, opt); \
285
-    SCALE_FUNC(filter_n, 10, 19, opt); \
286
-    SCALE_FUNC(filter_n, 12, 19, opt); \
287
-    SCALE_FUNC(filter_n, 14, 19, opt); \
288
-    SCALE_FUNC(filter_n, 16, 19, opt)
289
-
290
-#define SCALE_FUNCS_MMX(opt) \
291
-    SCALE_FUNCS(4, opt); \
292
-    SCALE_FUNCS(8, opt); \
293
-    SCALE_FUNCS(X, opt)
294
-
295
-#define SCALE_FUNCS_SSE(opt) \
296
-    SCALE_FUNCS(4, opt); \
297
-    SCALE_FUNCS(8, opt); \
298
-    SCALE_FUNCS(X4, opt); \
299
-    SCALE_FUNCS(X8, opt)
300
-
301
-#if ARCH_X86_32
302
-SCALE_FUNCS_MMX(mmx);
303
-#endif
304
-SCALE_FUNCS_SSE(sse2);
305
-SCALE_FUNCS_SSE(ssse3);
306
-SCALE_FUNCS_SSE(sse4);
307
-
308
-#define VSCALEX_FUNC(size, opt) \
309
-extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filterSize, \
310
-                                               const int16_t **src, uint8_t *dest, int dstW, \
311
-                                               const uint8_t *dither, int offset)
312
-#define VSCALEX_FUNCS(opt) \
313
-    VSCALEX_FUNC(8,  opt); \
314
-    VSCALEX_FUNC(9,  opt); \
315
-    VSCALEX_FUNC(10, opt)
316
-
317
-#if ARCH_X86_32
318
-VSCALEX_FUNCS(mmx2);
319
-#endif
320
-VSCALEX_FUNCS(sse2);
321
-VSCALEX_FUNCS(sse4);
322
-VSCALEX_FUNC(16, sse4);
323
-VSCALEX_FUNCS(avx);
324
-
325
-#define VSCALE_FUNC(size, opt) \
326
-extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst, int dstW, \
327
-                                               const uint8_t *dither, int offset)
328
-#define VSCALE_FUNCS(opt1, opt2) \
329
-    VSCALE_FUNC(8,  opt1); \
330
-    VSCALE_FUNC(9,  opt2); \
331
-    VSCALE_FUNC(10, opt2); \
332
-    VSCALE_FUNC(16, opt1)
333
-
334
-#if ARCH_X86_32
335
-VSCALE_FUNCS(mmx, mmx2);
336
-#endif
337
-VSCALE_FUNCS(sse2, sse2);
338
-VSCALE_FUNC(16, sse4);
339
-VSCALE_FUNCS(avx, avx);
340
-
341
-#define INPUT_Y_FUNC(fmt, opt) \
342
-extern void ff_ ## fmt ## ToY_  ## opt(uint8_t *dst, const uint8_t *src, \
343
-                                       int w, uint32_t *unused)
344
-#define INPUT_UV_FUNC(fmt, opt) \
345
-extern void ff_ ## fmt ## ToUV_ ## opt(uint8_t *dstU, uint8_t *dstV, \
346
-                                       const uint8_t *src, const uint8_t *unused1, \
347
-                                       int w, uint32_t *unused2)
348
-#define INPUT_FUNC(fmt, opt) \
349
-    INPUT_Y_FUNC(fmt, opt); \
350
-    INPUT_UV_FUNC(fmt, opt)
351
-#define INPUT_FUNCS(opt) \
352
-    INPUT_FUNC(uyvy, opt); \
353
-    INPUT_FUNC(yuyv, opt); \
354
-    INPUT_UV_FUNC(nv12, opt); \
355
-    INPUT_UV_FUNC(nv21, opt); \
356
-    INPUT_FUNC(rgba, opt); \
357
-    INPUT_FUNC(bgra, opt); \
358
-    INPUT_FUNC(argb, opt); \
359
-    INPUT_FUNC(abgr, opt); \
360
-    INPUT_FUNC(rgb24, opt); \
361
-    INPUT_FUNC(bgr24, opt)
362
-
363
-#if ARCH_X86_32
364
-INPUT_FUNCS(mmx);
365
-#endif
366
-INPUT_FUNCS(sse2);
367
-INPUT_FUNCS(ssse3);
368
-INPUT_FUNCS(avx);
369
-
370
-void ff_sws_init_swScale_mmx(SwsContext *c)
371
-{
372
-    int cpu_flags = av_get_cpu_flags();
373
-
374
-#if HAVE_INLINE_ASM
375
-    if (cpu_flags & AV_CPU_FLAG_MMX)
376
-        sws_init_swScale_MMX(c);
377
-#if HAVE_MMX2
378
-    if (cpu_flags & AV_CPU_FLAG_MMX2)
379
-        sws_init_swScale_MMX2(c);
380
-    if (cpu_flags & AV_CPU_FLAG_SSE3){
381
-        if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
382
-            c->yuv2planeX = yuv2yuvX_sse3;
383
-    }
384
-#endif
385
-#endif /* HAVE_INLINE_ASM */
386
-
387
-#if HAVE_YASM
388
-#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
389
-    if (c->srcBpc == 8) { \
390
-        hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
391
-                                     ff_hscale8to19_ ## filtersize ## _ ## opt1; \
392
-    } else if (c->srcBpc == 9) { \
393
-        hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
394
-                                     ff_hscale9to19_ ## filtersize ## _ ## opt1; \
395
-    } else if (c->srcBpc == 10) { \
396
-        hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
397
-                                     ff_hscale10to19_ ## filtersize ## _ ## opt1; \
398
-    } else if (c->srcBpc == 12) { \
399
-        hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
400
-                                     ff_hscale12to19_ ## filtersize ## _ ## opt1; \
401
-    } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \
402
-        hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
403
-                                     ff_hscale14to19_ ## filtersize ## _ ## opt1; \
404
-    } else { /* c->srcBpc == 16 */ \
405
-        av_assert0(c->srcBpc == 16);\
406
-        hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
407
-                                     ff_hscale16to19_ ## filtersize ## _ ## opt1; \
408
-    } \
409
-} while (0)
410
-#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
411
-    switch (filtersize) { \
412
-    case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
413
-    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
414
-    default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \
415
-    }
416
-#define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \
417
-switch(c->dstBpc){ \
418
-    case 16:                          do_16_case;                          break; \
419
-    case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
420
-    case 9:  if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_  ## opt; break; \
421
-    default: if (condition_8bit)    /*vscalefn = ff_yuv2planeX_8_  ## opt;*/ break; \
422
-    }
423
-#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
424
-    switch(c->dstBpc){ \
425
-    case 16: if (!isBE(c->dstFormat))            vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
426
-    case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
427
-    case 9:  if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_  ## opt2;  break; \
428
-    case 8:                                      vscalefn = ff_yuv2plane1_8_  ## opt1;  break; \
429
-    default: av_assert0(c->dstBpc>8); \
430
-    }
431
-#define case_rgb(x, X, opt) \
432
-        case PIX_FMT_ ## X: \
433
-            c->lumToYV12 = ff_ ## x ## ToY_ ## opt; \
434
-            if (!c->chrSrcHSubSample) \
435
-                c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \
436
-            break
437
-#if ARCH_X86_32
438
-    if (cpu_flags & AV_CPU_FLAG_MMX) {
439
-        ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
440
-        ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
441
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmx2, cpu_flags & AV_CPU_FLAG_MMX2);
442
-
443
-        switch (c->srcFormat) {
444
-        case PIX_FMT_Y400A:
445
-            c->lumToYV12 = ff_yuyvToY_mmx;
446
-            if (c->alpPixBuf)
447
-                c->alpToYV12 = ff_uyvyToY_mmx;
448
-            break;
449
-        case PIX_FMT_YUYV422:
450
-            c->lumToYV12 = ff_yuyvToY_mmx;
451
-            c->chrToYV12 = ff_yuyvToUV_mmx;
452
-            break;
453
-        case PIX_FMT_UYVY422:
454
-            c->lumToYV12 = ff_uyvyToY_mmx;
455
-            c->chrToYV12 = ff_uyvyToUV_mmx;
456
-            break;
457
-        case PIX_FMT_NV12:
458
-            c->chrToYV12 = ff_nv12ToUV_mmx;
459
-            break;
460
-        case PIX_FMT_NV21:
461
-            c->chrToYV12 = ff_nv21ToUV_mmx;
462
-            break;
463
-        case_rgb(rgb24, RGB24, mmx);
464
-        case_rgb(bgr24, BGR24, mmx);
465
-        case_rgb(bgra,  BGRA,  mmx);
466
-        case_rgb(rgba,  RGBA,  mmx);
467
-        case_rgb(abgr,  ABGR,  mmx);
468
-        case_rgb(argb,  ARGB,  mmx);
469
-        default:
470
-            break;
471
-        }
472
-    }
473
-    if (cpu_flags & AV_CPU_FLAG_MMX2) {
474
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmx2, , 1);
475
-    }
476
-#endif
477
-#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
478
-    switch (filtersize) { \
479
-    case 4:  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \
480
-    case 8:  ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \
481
-    default: if (filtersize & 4) ASSIGN_SCALE_FUNC2(hscalefn, X4, opt1, opt2); \
482
-             else                ASSIGN_SCALE_FUNC2(hscalefn, X8, opt1, opt2); \
483
-             break; \
484
-    }
485
-    if (cpu_flags & AV_CPU_FLAG_SSE2) {
486
-        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse2, sse2);
487
-        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
488
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
489
-                            HAVE_ALIGNED_STACK || ARCH_X86_64);
490
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
491
-
492
-        switch (c->srcFormat) {
493
-        case PIX_FMT_Y400A:
494
-            c->lumToYV12 = ff_yuyvToY_sse2;
495
-            if (c->alpPixBuf)
496
-                c->alpToYV12 = ff_uyvyToY_sse2;
497
-            break;
498
-        case PIX_FMT_YUYV422:
499
-            c->lumToYV12 = ff_yuyvToY_sse2;
500
-            c->chrToYV12 = ff_yuyvToUV_sse2;
501
-            break;
502
-        case PIX_FMT_UYVY422:
503
-            c->lumToYV12 = ff_uyvyToY_sse2;
504
-            c->chrToYV12 = ff_uyvyToUV_sse2;
505
-            break;
506
-        case PIX_FMT_NV12:
507
-            c->chrToYV12 = ff_nv12ToUV_sse2;
508
-            break;
509
-        case PIX_FMT_NV21:
510
-            c->chrToYV12 = ff_nv21ToUV_sse2;
511
-            break;
512
-        case_rgb(rgb24, RGB24, sse2);
513
-        case_rgb(bgr24, BGR24, sse2);
514
-        case_rgb(bgra,  BGRA,  sse2);
515
-        case_rgb(rgba,  RGBA,  sse2);
516
-        case_rgb(abgr,  ABGR,  sse2);
517
-        case_rgb(argb,  ARGB,  sse2);
518
-        default:
519
-            break;
520
-        }
521
-    }
522
-    if (cpu_flags & AV_CPU_FLAG_SSSE3) {
523
-        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, ssse3, ssse3);
524
-        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, ssse3, ssse3);
525
-        switch (c->srcFormat) {
526
-        case_rgb(rgb24, RGB24, ssse3);
527
-        case_rgb(bgr24, BGR24, ssse3);
528
-        default:
529
-            break;
530
-        }
531
-    }
532
-    if (cpu_flags & AV_CPU_FLAG_SSE4) {
533
-        /* Xto15 don't need special sse4 functions */
534
-        ASSIGN_SSE_SCALE_FUNC(c->hyScale, c->hLumFilterSize, sse4, ssse3);
535
-        ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse4, ssse3);
536
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
537
-                            if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
538
-                            HAVE_ALIGNED_STACK || ARCH_X86_64);
539
-        if (c->dstBpc == 16 && !isBE(c->dstFormat))
540
-            c->yuv2plane1 = ff_yuv2plane1_16_sse4;
541
-    }
542
-
543
-    if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) {
544
-        ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
545
-                            HAVE_ALIGNED_STACK || ARCH_X86_64);
546
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
547
-
548
-        switch (c->srcFormat) {
549
-        case PIX_FMT_YUYV422:
550
-            c->chrToYV12 = ff_yuyvToUV_avx;
551
-            break;
552
-        case PIX_FMT_UYVY422:
553
-            c->chrToYV12 = ff_uyvyToUV_avx;
554
-            break;
555
-        case PIX_FMT_NV12:
556
-            c->chrToYV12 = ff_nv12ToUV_avx;
557
-            break;
558
-        case PIX_FMT_NV21:
559
-            c->chrToYV12 = ff_nv21ToUV_avx;
560
-            break;
561
-        case_rgb(rgb24, RGB24, avx);
562
-        case_rgb(bgr24, BGR24, avx);
563
-        case_rgb(bgra,  BGRA,  avx);
564
-        case_rgb(rgba,  RGBA,  avx);
565
-        case_rgb(abgr,  ABGR,  avx);
566
-        case_rgb(argb,  ARGB,  avx);
567
-        default:
568
-            break;
569
-        }
570
-    }
571
-#endif
572
-}
573 1
new file mode 100644
... ...
@@ -0,0 +1,111 @@
0
+/*
1
+ * software YUV to RGB converter
2
+ *
3
+ * Copyright (C) 2009 Konstantin Shishkov
4
+ *
5
+ * MMX/MMX2 template stuff (needed for fast movntq support),
6
+ * 1,4,8bpp support and context / deglobalize stuff
7
+ * by Michael Niedermayer (michaelni@gmx.at)
8
+ *
9
+ * This file is part of FFmpeg.
10
+ *
11
+ * FFmpeg is free software; you can redistribute it and/or
12
+ * modify it under the terms of the GNU Lesser General Public
13
+ * License as published by the Free Software Foundation; either
14
+ * version 2.1 of the License, or (at your option) any later version.
15
+ *
16
+ * FFmpeg is distributed in the hope that it will be useful,
17
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
+ * Lesser General Public License for more details.
20
+ *
21
+ * You should have received a copy of the GNU Lesser General Public
22
+ * License along with FFmpeg; if not, write to the Free Software
23
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
+ */
25
+
26
+#include <stdio.h>
27
+#include <stdlib.h>
28
+#include <inttypes.h>
29
+#include <assert.h>
30
+
31
+#include "config.h"
32
+#include "libswscale/rgb2rgb.h"
33
+#include "libswscale/swscale.h"
34
+#include "libswscale/swscale_internal.h"
35
+#include "libavutil/attributes.h"
36
+#include "libavutil/x86_cpu.h"
37
+#include "libavutil/cpu.h"
38
+
39
+#if HAVE_INLINE_ASM
40
+
41
+#define DITHER1XBPP // only for MMX
42
+
43
+/* hope these constant values are cache line aligned */
44
+DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw)   = 0x00ff00ff00ff00ffULL;
45
+DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
46
+DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
47
+DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL;
48
+DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
49
+DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
50
+
51
+//MMX versions
52
+#if HAVE_MMX
53
+#undef RENAME
54
+#undef COMPILE_TEMPLATE_MMX2
55
+#define COMPILE_TEMPLATE_MMX2 0
56
+#define RENAME(a) a ## _MMX
57
+#include "yuv2rgb_template.c"
58
+#endif /* HAVE_MMX */
59
+
60
+//MMX2 versions
61
+#if HAVE_MMX2
62
+#undef RENAME
63
+#undef COMPILE_TEMPLATE_MMX2
64
+#define COMPILE_TEMPLATE_MMX2 1
65
+#define RENAME(a) a ## _MMX2
66
+#include "yuv2rgb_template.c"
67
+#endif /* HAVE_MMX2 */
68
+
69
+#endif /* HAVE_INLINE_ASM */
70
+
71
+av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
72
+{
73
+#if HAVE_INLINE_ASM
74
+    int cpu_flags = av_get_cpu_flags();
75
+
76
+#if HAVE_MMX2
77
+    if (cpu_flags & AV_CPU_FLAG_MMX2) {
78
+        switch (c->dstFormat) {
79
+        case PIX_FMT_RGB24:  return yuv420_rgb24_MMX2;
80
+        case PIX_FMT_BGR24:  return yuv420_bgr24_MMX2;
81
+        }
82
+    }
83
+#endif
84
+
85
+    if (cpu_flags & AV_CPU_FLAG_MMX) {
86
+        switch (c->dstFormat) {
87
+            case PIX_FMT_RGB32:
88
+                if (c->srcFormat == PIX_FMT_YUVA420P) {
89
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
90
+                    return yuva420_rgb32_MMX;
91
+#endif
92
+                    break;
93
+                } else return yuv420_rgb32_MMX;
94
+            case PIX_FMT_BGR32:
95
+                if (c->srcFormat == PIX_FMT_YUVA420P) {
96
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
97
+                    return yuva420_bgr32_MMX;
98
+#endif
99
+                    break;
100
+                } else return yuv420_bgr32_MMX;
101
+            case PIX_FMT_RGB24:  return yuv420_rgb24_MMX;
102
+            case PIX_FMT_BGR24:  return yuv420_bgr24_MMX;
103
+            case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
104
+            case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
105
+        }
106
+    }
107
+#endif /* HAVE_INLINE_ASM */
108
+
109
+    return NULL;
110
+}
0 111
deleted file mode 100644
... ...
@@ -1,110 +0,0 @@
1
-/*
2
- * software YUV to RGB converter
3
- *
4
- * Copyright (C) 2009 Konstantin Shishkov
5
- *
6
- * MMX/MMX2 template stuff (needed for fast movntq support),
7
- * 1,4,8bpp support and context / deglobalize stuff
8
- * by Michael Niedermayer (michaelni@gmx.at)
9
- *
10
- * This file is part of FFmpeg.
11
- *
12
- * FFmpeg is free software; you can redistribute it and/or
13
- * modify it under the terms of the GNU Lesser General Public
14
- * License as published by the Free Software Foundation; either
15
- * version 2.1 of the License, or (at your option) any later version.
16
- *
17
- * FFmpeg is distributed in the hope that it will be useful,
18
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
- * Lesser General Public License for more details.
21
- *
22
- * You should have received a copy of the GNU Lesser General Public
23
- * License along with FFmpeg; if not, write to the Free Software
24
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
- */
26
-
27
-#include <stdio.h>
28
-#include <stdlib.h>
29
-#include <inttypes.h>
30
-#include <assert.h>
31
-
32
-#include "config.h"
33
-#include "libswscale/rgb2rgb.h"
34
-#include "libswscale/swscale.h"
35
-#include "libswscale/swscale_internal.h"
36
-#include "libavutil/x86_cpu.h"
37
-#include "libavutil/cpu.h"
38
-
39
-#if HAVE_INLINE_ASM
40
-
41
-#define DITHER1XBPP // only for MMX
42
-
43
-/* hope these constant values are cache line aligned */
44
-DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw)   = 0x00ff00ff00ff00ffULL;
45
-DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
46
-DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
47
-DECLARE_ASM_CONST(8, uint64_t, pb_e0) = 0xe0e0e0e0e0e0e0e0ULL;
48
-DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
49
-DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
50
-
51
-//MMX versions
52
-#if HAVE_MMX
53
-#undef RENAME
54
-#undef COMPILE_TEMPLATE_MMX2
55
-#define COMPILE_TEMPLATE_MMX2 0
56
-#define RENAME(a) a ## _MMX
57
-#include "yuv2rgb_template.c"
58
-#endif /* HAVE_MMX */
59
-
60
-//MMX2 versions
61
-#if HAVE_MMX2
62
-#undef RENAME
63
-#undef COMPILE_TEMPLATE_MMX2
64
-#define COMPILE_TEMPLATE_MMX2 1
65
-#define RENAME(a) a ## _MMX2
66
-#include "yuv2rgb_template.c"
67
-#endif /* HAVE_MMX2 */
68
-
69
-#endif /* HAVE_INLINE_ASM */
70
-
71
-SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
72
-{
73
-#if HAVE_INLINE_ASM
74
-    int cpu_flags = av_get_cpu_flags();
75
-
76
-#if HAVE_MMX2
77
-    if (cpu_flags & AV_CPU_FLAG_MMX2) {
78
-        switch (c->dstFormat) {
79
-        case PIX_FMT_RGB24:  return yuv420_rgb24_MMX2;
80
-        case PIX_FMT_BGR24:  return yuv420_bgr24_MMX2;
81
-        }
82
-    }
83
-#endif
84
-
85
-    if (cpu_flags & AV_CPU_FLAG_MMX) {
86
-        switch (c->dstFormat) {
87
-            case PIX_FMT_RGB32:
88
-                if (c->srcFormat == PIX_FMT_YUVA420P) {
89
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
90
-                    return yuva420_rgb32_MMX;
91
-#endif
92
-                    break;
93
-                } else return yuv420_rgb32_MMX;
94
-            case PIX_FMT_BGR32:
95
-                if (c->srcFormat == PIX_FMT_YUVA420P) {
96
-#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
97
-                    return yuva420_bgr32_MMX;
98
-#endif
99
-                    break;
100
-                } else return yuv420_bgr32_MMX;
101
-            case PIX_FMT_RGB24:  return yuv420_rgb24_MMX;
102
-            case PIX_FMT_BGR24:  return yuv420_bgr24_MMX;
103
-            case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
104
-            case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
105
-        }
106
-    }
107
-#endif /* HAVE_INLINE_ASM */
108
-
109
-    return NULL;
110
-}
... ...
@@ -36,6 +36,9 @@ fate-vc1_sa10091: CMD = framecrc -i $(SAMPLES)/vc1/SA10091.vc1
36 36
 FATE_VC1 += fate-vc1_sa20021
37 37
 fate-vc1_sa20021: CMD = framecrc -i $(SAMPLES)/vc1/SA20021.vc1
38 38
 
39
+#FATE_VC1 += fate-vc1_sa10143
40
+fate-vc1_sa10143: CMD = framecrc -i $(SAMPLES)/vc1/SA10143.vc1
41
+
39 42
 FATE_VC1 += fate-vc1-ism
40 43
 fate-vc1-ism: CMD = framecrc -i $(SAMPLES)/isom/vc1-wmapro.ism -an
41 44
 
42 45
new file mode 100644
... ...
@@ -0,0 +1,31 @@
0
+#tb 0: 1/25
1
+0,          0,          0,        1,   518400, 0x89407f55
2
+0,          2,          2,        1,   518400, 0xeb8d84a1
3
+0,          3,          3,        1,   518400, 0x2121ff57
4
+0,          4,          4,        1,   518400, 0xd81adb3d
5
+0,          5,          5,        1,   518400, 0x01e36aa2
6
+0,          6,          6,        1,   518400, 0x6b802361
7
+0,          7,          7,        1,   518400, 0xc8403c77
8
+0,          8,          8,        1,   518400, 0xdd342b5d
9
+0,          9,          9,        1,   518400, 0x2100eea5
10
+0,         10,         10,        1,   518400, 0x92a22da6
11
+0,         11,         11,        1,   518400, 0x6bacdef7
12
+0,         12,         12,        1,   518400, 0x4a00715f
13
+0,         13,         13,        1,   518400, 0x59b98727
14
+0,         14,         14,        1,   518400, 0xbf912ee1
15
+0,         15,         15,        1,   518400, 0x8c966cd6
16
+0,         16,         16,        1,   518400, 0x2c9a2535
17
+0,         17,         17,        1,   518400, 0x29085c06
18
+0,         18,         18,        1,   518400, 0x46ae6b7d
19
+0,         19,         19,        1,   518400, 0x283100f4
20
+0,         20,         20,        1,   518400, 0x2731b5ff
21
+0,         21,         21,        1,   518400, 0x1132ea54
22
+0,         22,         22,        1,   518400, 0x37cbe539
23
+0,         23,         23,        1,   518400, 0x08ff75cf
24
+0,         24,         24,        1,   518400, 0xafb6bc45
25
+0,         25,         25,        1,   518400, 0x19d3873d
26
+0,         26,         26,        1,   518400, 0xd494a8be
27
+0,         27,         27,        1,   518400, 0x285f41ef
28
+0,         28,         28,        1,   518400, 0xd4b1ffa1
29
+0,         29,         29,        1,   518400, 0xc3876c3a
30
+0,         30,         30,        1,   518400, 0xb73dbb62