Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
rtmp: Add support for SWFVerification
api-example: use new video encoding API.
x86: avcodec: Appropriately name files containing only init functions
mpegvideo_mmx_template: drop some commented-out cruft
libavresample: add mix level normalization option
w32pthreads: Add missing #includes to make header compile standalone
rtmp: Gracefully ignore _checkbw errors by tracking them
rtmp: Do not send _checkbw calls as notifications
prores: interlaced ProRes encoding

Conflicts:
doc/examples/decoding_encoding.c
libavcodec/proresenc_kostya.c
libavcodec/w32pthreads.h
libavcodec/x86/Makefile
libavformat/version.h

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2012/08/15 22:41:01
Showing 20 changed files
... ...
@@ -31,10 +31,10 @@
31 31
 
32 32
 #include <math.h>
33 33
 
34
-#include <libavutil/imgutils.h>
35 34
 #include <libavutil/opt.h>
36 35
 #include <libavcodec/avcodec.h>
37 36
 #include <libavutil/audioconvert.h>
37
+#include <libavutil/imgutils.h>
38 38
 #include <libavutil/mathematics.h>
39 39
 #include <libavutil/samplefmt.h>
40 40
 
... ...
@@ -315,11 +315,11 @@ static void video_encode_example(const char *filename, int codec_id)
315 315
 {
316 316
     AVCodec *codec;
317 317
     AVCodecContext *c= NULL;
318
-    int i, out_size, x, y, outbuf_size;
318
+    int i, ret, x, y, got_output;
319 319
     FILE *f;
320 320
     AVFrame *picture;
321
-    uint8_t *outbuf;
322
-    int had_output=0;
321
+    AVPacket pkt;
322
+    uint8_t endcode[] = { 0, 0, 1, 0xb7 };
323 323
 
324 324
     printf("Encode video file %s\n", filename);
325 325
 
... ...
@@ -359,17 +359,25 @@ static void video_encode_example(const char *filename, int codec_id)
359 359
         exit(1);
360 360
     }
361 361
 
362
-    /* alloc image and output buffer */
363
-    outbuf_size = 100000 + 12*c->width*c->height;
364
-    outbuf = malloc(outbuf_size);
365
-
366 362
     /* the image can be allocated by any means and av_image_alloc() is
367 363
      * just the most convenient way if av_malloc() is to be used */
368
-    av_image_alloc(picture->data, picture->linesize,
369
-                   c->width, c->height, c->pix_fmt, 1);
364
+    ret = av_image_alloc(picture->data, picture->linesize, c->width, c->height,
365
+                         c->pix_fmt, 32);
366
+    if (ret < 0) {
367
+        fprintf(stderr, "could not alloc raw picture buffer\n");
368
+        exit(1);
369
+    }
370
+
371
+    picture->format = c->pix_fmt;
372
+    picture->width  = c->width;
373
+    picture->height = c->height;
370 374
 
371 375
     /* encode 1 second of video */
372 376
     for(i=0;i<25;i++) {
377
+        av_init_packet(&pkt);
378
+        pkt.data = NULL;    // packet data will be allocated by the encoder
379
+        pkt.size = 0;
380
+
373 381
         fflush(stdout);
374 382
         /* prepare a dummy image */
375 383
         /* Y */
... ...
@@ -387,35 +395,46 @@ static void video_encode_example(const char *filename, int codec_id)
387 387
             }
388 388
         }
389 389
 
390
+        picture->pts = i;
391
+
390 392
         /* encode the image */
391
-        out_size = avcodec_encode_video(c, outbuf, outbuf_size, picture);
392
-        had_output |= out_size;
393
-        printf("encoding frame %3d (size=%5d)\n", i, out_size);
394
-        fwrite(outbuf, 1, out_size, f);
393
+        ret = avcodec_encode_video2(c, &pkt, picture, &got_output);
394
+        if (ret < 0) {
395
+            fprintf(stderr, "error encoding frame\n");
396
+            exit(1);
397
+        }
398
+
399
+        if (got_output) {
400
+            printf("encoding frame %3d (size=%5d)\n", i, pkt.size);
401
+            fwrite(pkt.data, 1, pkt.size, f);
402
+            av_free_packet(&pkt);
403
+        }
395 404
     }
396 405
 
397 406
     /* get the delayed frames */
398
-    for(; out_size || !had_output; i++) {
407
+    for (got_output = 1; got_output; i++) {
399 408
         fflush(stdout);
400 409
 
401
-        out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
402
-        had_output |= out_size;
403
-        printf("write frame %3d (size=%5d)\n", i, out_size);
404
-        fwrite(outbuf, 1, out_size, f);
410
+        ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
411
+        if (ret < 0) {
412
+            fprintf(stderr, "error encoding frame\n");
413
+            exit(1);
414
+        }
415
+
416
+        if (got_output) {
417
+            printf("write frame %3d (size=%5d)\n", i, pkt.size);
418
+            fwrite(pkt.data, 1, pkt.size, f);
419
+            av_free_packet(&pkt);
420
+        }
405 421
     }
406 422
 
407 423
     /* add sequence end code to have a real mpeg file */
408
-    outbuf[0] = 0x00;
409
-    outbuf[1] = 0x00;
410
-    outbuf[2] = 0x01;
411
-    outbuf[3] = 0xb7;
412
-    fwrite(outbuf, 1, 4, f);
424
+    fwrite(endcode, 1, sizeof(endcode), f);
413 425
     fclose(f);
414
-    free(outbuf);
415 426
 
416 427
     avcodec_close(c);
417 428
     av_free(c);
418
-    av_free(picture->data[0]);
429
+    av_freep(&picture->data[0]);
419 430
     av_free(picture);
420 431
     printf("\n");
421 432
 }
... ...
@@ -272,6 +272,12 @@ Name of live stream to subscribe to. By default no value will be sent.
272 272
 It is only sent if the option is specified or if rtmp_live
273 273
 is set to live.
274 274
 
275
+@item rtmp_swfhash
276
+SHA256 hash of the decompressed SWF file (32 bytes).
277
+
278
+@item rtmp_swfsize
279
+Size of the decompressed SWF file, required for SWFVerification.
280
+
275 281
 @item rtmp_swfurl
276 282
 URL of the SWF player for the media. By default no value will be sent.
277 283
 
... ...
@@ -406,10 +406,15 @@ static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
406 406
     int total_size = 0;
407 407
     const uint16_t *src;
408 408
     int slice_width_factor = av_log2(mbs_per_slice);
409
-    int num_cblocks, pwidth, linesize, line_offset;
409
+    int num_cblocks, pwidth, linesize, line_add;
410 410
     int plane_factor, is_chroma;
411 411
     uint16_t *qmat;
412 412
 
413
+    if (ctx->pictures_per_frame == 1)
414
+        line_add = 0;
415
+    else
416
+        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
417
+
413 418
     if (ctx->force_quant) {
414 419
         qmat = ctx->quants[0];
415 420
     } else if (quant < MAX_STORED_Q) {
... ...
@@ -437,15 +442,14 @@ static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
437 437
             pwidth      = avctx->width >> 1;
438 438
         }
439 439
 
440
-        line_offset = ((ctx->cur_picture_idx ^ !pic->top_field_first) &
441
-                       (ctx->pictures_per_frame - 1)) * pic->linesize[i];
442 440
         linesize = pic->linesize[i] * ctx->pictures_per_frame;
443
-        src = (const uint16_t*)(pic->data[i] + yp * linesize + line_offset) + xp;
441
+        src = (const uint16_t*)(pic->data[i] + yp * linesize +
442
+                                line_add * pic->linesize[i]) + xp;
444 443
 
445 444
         get_slice_data(ctx, src, linesize, xp, yp,
446 445
                        pwidth, avctx->height / ctx->pictures_per_frame,
447
-                       ctx->blocks[0], ctx->emu_buf, mbs_per_slice,
448
-                       num_cblocks, is_chroma);
446
+                       ctx->blocks[0], ctx->emu_buf,
447
+                       mbs_per_slice, num_cblocks, is_chroma);
449 448
         sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
450 449
                                       mbs_per_slice, ctx->blocks[0],
451 450
                                       num_cblocks, plane_factor,
... ...
@@ -579,8 +583,12 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
579 579
     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
580 580
     int overquant;
581 581
     uint16_t *qmat;
582
-    int linesize[4], line_offset;
582
+    int linesize[4], line_add;
583 583
 
584
+    if (ctx->pictures_per_frame == 1)
585
+        line_add = 0;
586
+    else
587
+        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
584 588
     mbs = x + mbs_per_slice;
585 589
 
586 590
     for (i = 0; i < ctx->num_planes; i++) {
... ...
@@ -600,15 +608,14 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
600 600
             pwidth         = avctx->width >> 1;
601 601
         }
602 602
 
603
-        line_offset = ((ctx->cur_picture_idx ^ !pic->top_field_first) &
604
-                       (ctx->pictures_per_frame - 1)) * pic->linesize[i];
605 603
         linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
606
-        src = (const uint16_t*)(pic->data[i] + yp * linesize[i] + line_offset) + xp;
604
+        src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
605
+                                line_add * pic->linesize[i]) + xp;
607 606
 
608 607
         get_slice_data(ctx, src, linesize[i], xp, yp,
609 608
                        pwidth, avctx->height / ctx->pictures_per_frame,
610
-                       td->blocks[i], td->emu_buf, mbs_per_slice,
611
-                       num_cblocks[i], is_chroma[i]);
609
+                       td->blocks[i], td->emu_buf,
610
+                       mbs_per_slice, num_cblocks[i], is_chroma[i]);
612 611
     }
613 612
 
614 613
     for (q = min_quant; q < max_quant + 2; q++) {
... ...
@@ -767,9 +774,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
767 767
     bytestream_put_be16  (&buf, avctx->height);
768 768
 
769 769
     frame_flags = ctx->chroma_factor << 6;
770
-    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
770
+    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
771 771
         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
772
-    }
773 772
     bytestream_put_byte  (&buf, frame_flags);
774 773
 
775 774
     bytestream_put_byte  (&buf, 0);             // reserved
... ...
@@ -791,7 +797,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
791 791
     }
792 792
     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
793 793
 
794
-    for (ctx->cur_picture_idx = 0; ctx->cur_picture_idx < ctx->pictures_per_frame; ++ctx->cur_picture_idx) {
794
+    for (ctx->cur_picture_idx = 0;
795
+         ctx->cur_picture_idx < ctx->pictures_per_frame;
796
+         ctx->cur_picture_idx++) {
795 797
         // picture header
796 798
         picture_size_pos = buf + 1;
797 799
         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
... ...
@@ -845,7 +853,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
845 845
     frame_size = buf - orig_buf;
846 846
     bytestream_put_be32(&orig_buf, frame_size);
847 847
 
848
-
849 848
     pkt->size   = frame_size;
850 849
     pkt->flags |= AV_PKT_FLAG_KEY;
851 850
     *got_packet = 1;
... ...
@@ -927,7 +934,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
927 927
     if (!ctx->force_quant) {
928 928
         if (!ctx->bits_per_mb) {
929 929
             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
930
-                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height * ctx->pictures_per_frame)
930
+                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
931
+                                           ctx->pictures_per_frame)
931 932
                     break;
932 933
             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
933 934
         } else if (ctx->bits_per_mb < 128) {
... ...
@@ -991,12 +999,15 @@ static av_cold int encode_init(AVCodecContext *avctx)
991 991
     ctx->frame_size_upper_bound = ctx->pictures_per_frame *
992 992
                                   ctx->slices_per_picture *
993 993
                                   (2 + 2 * ctx->num_planes +
994
-                                   (mps * ctx->bits_per_mb) / 8) + 200;
994
+                                   (mps * ctx->bits_per_mb) / 8)
995
+                                  + 200;
995 996
 
996 997
     avctx->codec_tag   = ctx->profile_info->tag;
997 998
 
998
-    av_log(avctx, AV_LOG_DEBUG, "profile %d, %d slices/pic, %d pics/frame, %d bits per MB\n",
999
-           ctx->profile, ctx->slices_per_picture, ctx->pictures_per_frame, ctx->bits_per_mb);
999
+    av_log(avctx, AV_LOG_DEBUG,
1000
+           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1001
+           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1002
+           interlaced ? "yes" : "no", ctx->bits_per_mb);
1000 1003
     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1001 1004
            ctx->frame_size_upper_bound);
1002 1005
 
... ...
@@ -40,6 +40,8 @@
40 40
 #include <process.h>
41 41
 
42 42
 #include "libavutil/common.h"
43
+#include "libavutil/internal.h"
44
+#include "libavutil/mem.h"
43 45
 
44 46
 typedef struct {
45 47
     void *handle;
... ...
@@ -5,7 +5,7 @@ OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
5 5
 
6 6
 MMX-OBJS                               += x86/dsputil_mmx.o             \
7 7
                                           x86/fdct_mmx.o                \
8
-                                          x86/fmtconvert_mmx.o          \
8
+                                          x86/fmtconvert_init.o         \
9 9
                                           x86/idct_mmx_xvid.o           \
10 10
                                           x86/idct_sse2_xvid.o          \
11 11
                                           x86/motion_est_mmx.o          \
... ...
@@ -13,15 +13,15 @@ MMX-OBJS                               += x86/dsputil_mmx.o             \
13 13
                                           x86/simple_idct_mmx.o         \
14 14
 
15 15
 MMX-OBJS-$(CONFIG_AAC_DECODER)         += x86/sbrdsp_init.o
16
-MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_mmx.o
16
+MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_init.o
17 17
 MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
18 18
 MMX-OBJS-$(CONFIG_DNXHD_ENCODER)       += x86/dnxhd_mmx.o
19 19
 MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o \
20 20
                                           x86/dwt.o
21 21
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
22
-MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
22
+MMX-OBJS-$(CONFIG_FFT)                 += x86/fft_init.o
23 23
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
24
-MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_mmx.o
24
+MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_init.o
25 25
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
26 26
 MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
27 27
 MMX-OBJS-$(CONFIG_MPEGAUDIODSP)        += x86/mpegaudiodec_mmx.o
28 28
new file mode 100644
... ...
@@ -0,0 +1,93 @@
0
+/*
1
+ * x86-optimized AC-3 DSP utils
2
+ * Copyright (c) 2011 Justin Ruggles
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "libavutil/x86/asm.h"
22
+#include "dsputil_mmx.h"
23
+#include "libavcodec/ac3dsp.h"
24
+
25
+extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
26
+extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
27
+extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
28
+
29
+extern int ff_ac3_max_msb_abs_int16_mmx  (const int16_t *src, int len);
30
+extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
31
+extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
32
+extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
33
+
34
+extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
35
+extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
36
+
37
+extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
38
+extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
39
+
40
+extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
41
+extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
42
+extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
43
+
44
+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
45
+
46
+extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
47
+extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
48
+extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
49
+
50
+av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
51
+{
52
+#if HAVE_YASM
53
+    int mm_flags = av_get_cpu_flags();
54
+
55
+    if (mm_flags & AV_CPU_FLAG_MMX) {
56
+        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
57
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
58
+        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
59
+        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
60
+    }
61
+    if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
62
+        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
63
+        if (!bit_exact) {
64
+            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
65
+        }
66
+    }
67
+    if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
68
+        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
69
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
70
+    }
71
+    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
72
+        c->float_to_fixed24 = ff_float_to_fixed24_sse;
73
+    }
74
+    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
75
+        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
76
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
77
+        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
78
+        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
79
+        c->extract_exponents = ff_ac3_extract_exponents_sse2;
80
+        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
81
+            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
82
+            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
83
+        }
84
+    }
85
+    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
86
+        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
87
+        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
88
+            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
89
+        }
90
+    }
91
+#endif
92
+}
0 93
deleted file mode 100644
... ...
@@ -1,93 +0,0 @@
1
-/*
2
- * x86-optimized AC-3 DSP utils
3
- * Copyright (c) 2011 Justin Ruggles
4
- *
5
- * This file is part of FFmpeg.
6
- *
7
- * FFmpeg is free software; you can redistribute it and/or
8
- * modify it under the terms of the GNU Lesser General Public
9
- * License as published by the Free Software Foundation; either
10
- * version 2.1 of the License, or (at your option) any later version.
11
- *
12
- * FFmpeg is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
- * Lesser General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU Lesser General Public
18
- * License along with FFmpeg; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
- */
21
-
22
-#include "libavutil/x86/asm.h"
23
-#include "dsputil_mmx.h"
24
-#include "libavcodec/ac3dsp.h"
25
-
26
-extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
27
-extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
28
-extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
29
-
30
-extern int ff_ac3_max_msb_abs_int16_mmx  (const int16_t *src, int len);
31
-extern int ff_ac3_max_msb_abs_int16_mmx2 (const int16_t *src, int len);
32
-extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
33
-extern int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
34
-
35
-extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
36
-extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
37
-
38
-extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
39
-extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
40
-
41
-extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned int len);
42
-extern void ff_float_to_fixed24_sse  (int32_t *dst, const float *src, unsigned int len);
43
-extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
44
-
45
-extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
46
-
47
-extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
48
-extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
49
-extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
50
-
51
-av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
52
-{
53
-#if HAVE_YASM
54
-    int mm_flags = av_get_cpu_flags();
55
-
56
-    if (mm_flags & AV_CPU_FLAG_MMX) {
57
-        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
58
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
59
-        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
60
-        c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
61
-    }
62
-    if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
63
-        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
64
-        if (!bit_exact) {
65
-            c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
66
-        }
67
-    }
68
-    if (mm_flags & AV_CPU_FLAG_MMXEXT && HAVE_MMXEXT) {
69
-        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
70
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx2;
71
-    }
72
-    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
73
-        c->float_to_fixed24 = ff_float_to_fixed24_sse;
74
-    }
75
-    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
76
-        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
77
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
78
-        c->float_to_fixed24 = ff_float_to_fixed24_sse2;
79
-        c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
80
-        c->extract_exponents = ff_ac3_extract_exponents_sse2;
81
-        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
82
-            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
83
-            c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
84
-        }
85
-    }
86
-    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
87
-        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
88
-        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
89
-            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
90
-        }
91
-    }
92
-#endif
93
-}
94 1
deleted file mode 100644
... ...
@@ -1,72 +0,0 @@
1
-/*
2
- * This file is part of FFmpeg.
3
- *
4
- * FFmpeg is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU Lesser General Public
6
- * License as published by the Free Software Foundation; either
7
- * version 2.1 of the License, or (at your option) any later version.
8
- *
9
- * FFmpeg is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
- * Lesser General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU Lesser General Public
15
- * License along with FFmpeg; if not, write to the Free Software
16
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
- */
18
-
19
-#include "libavutil/cpu.h"
20
-#include "libavcodec/dsputil.h"
21
-#include "libavcodec/dct.h"
22
-#include "fft.h"
23
-
24
-av_cold void ff_fft_init_mmx(FFTContext *s)
25
-{
26
-#if HAVE_YASM
27
-    int has_vectors = av_get_cpu_flags();
28
-#if ARCH_X86_32
29
-    if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
30
-        /* 3DNow! for K6-2/3 */
31
-        s->imdct_calc = ff_imdct_calc_3dnow;
32
-        s->imdct_half = ff_imdct_half_3dnow;
33
-        s->fft_calc   = ff_fft_calc_3dnow;
34
-    }
35
-    if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
36
-        /* 3DNowEx for K7 */
37
-        s->imdct_calc = ff_imdct_calc_3dnowext;
38
-        s->imdct_half = ff_imdct_half_3dnowext;
39
-        s->fft_calc   = ff_fft_calc_3dnowext;
40
-    }
41
-#endif
42
-    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
43
-        /* SSE for P3/P4/K8 */
44
-        s->imdct_calc  = ff_imdct_calc_sse;
45
-        s->imdct_half  = ff_imdct_half_sse;
46
-        s->fft_permute = ff_fft_permute_sse;
47
-        s->fft_calc    = ff_fft_calc_sse;
48
-        s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
49
-    }
50
-    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) {
51
-        /* AVX for SB */
52
-        s->imdct_half      = ff_imdct_half_avx;
53
-        s->fft_calc        = ff_fft_calc_avx;
54
-        s->fft_permutation = FF_FFT_PERM_AVX;
55
-    }
56
-#endif
57
-}
58
-
59
-#if CONFIG_DCT
60
-av_cold void ff_dct_init_mmx(DCTContext *s)
61
-{
62
-#if HAVE_YASM
63
-    int has_vectors = av_get_cpu_flags();
64
-    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
65
-        s->dct32 = ff_dct32_float_sse;
66
-    if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE)
67
-        s->dct32 = ff_dct32_float_sse2;
68
-    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
69
-        s->dct32 = ff_dct32_float_avx;
70
-#endif
71
-}
72
-#endif
73 1
new file mode 100644
... ...
@@ -0,0 +1,72 @@
0
+/*
1
+ * This file is part of FFmpeg.
2
+ *
3
+ * FFmpeg is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * FFmpeg is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with FFmpeg; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include "libavutil/cpu.h"
19
+#include "libavcodec/dsputil.h"
20
+#include "libavcodec/dct.h"
21
+#include "fft.h"
22
+
23
+av_cold void ff_fft_init_mmx(FFTContext *s)
24
+{
25
+#if HAVE_YASM
26
+    int has_vectors = av_get_cpu_flags();
27
+#if ARCH_X86_32
28
+    if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
29
+        /* 3DNow! for K6-2/3 */
30
+        s->imdct_calc = ff_imdct_calc_3dnow;
31
+        s->imdct_half = ff_imdct_half_3dnow;
32
+        s->fft_calc   = ff_fft_calc_3dnow;
33
+    }
34
+    if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) {
35
+        /* 3DNowEx for K7 */
36
+        s->imdct_calc = ff_imdct_calc_3dnowext;
37
+        s->imdct_half = ff_imdct_half_3dnowext;
38
+        s->fft_calc   = ff_fft_calc_3dnowext;
39
+    }
40
+#endif
41
+    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
42
+        /* SSE for P3/P4/K8 */
43
+        s->imdct_calc  = ff_imdct_calc_sse;
44
+        s->imdct_half  = ff_imdct_half_sse;
45
+        s->fft_permute = ff_fft_permute_sse;
46
+        s->fft_calc    = ff_fft_calc_sse;
47
+        s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
48
+    }
49
+    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX && s->nbits >= 5) {
50
+        /* AVX for SB */
51
+        s->imdct_half      = ff_imdct_half_avx;
52
+        s->fft_calc        = ff_fft_calc_avx;
53
+        s->fft_permutation = FF_FFT_PERM_AVX;
54
+    }
55
+#endif
56
+}
57
+
58
+#if CONFIG_DCT
59
+av_cold void ff_dct_init_mmx(DCTContext *s)
60
+{
61
+#if HAVE_YASM
62
+    int has_vectors = av_get_cpu_flags();
63
+    if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE)
64
+        s->dct32 = ff_dct32_float_sse;
65
+    if (has_vectors & AV_CPU_FLAG_SSE2 && HAVE_SSE)
66
+        s->dct32 = ff_dct32_float_sse2;
67
+    if (has_vectors & AV_CPU_FLAG_AVX && HAVE_AVX)
68
+        s->dct32 = ff_dct32_float_avx;
69
+#endif
70
+}
71
+#endif
0 72
new file mode 100644
... ...
@@ -0,0 +1,147 @@
0
+/*
1
+ * Format Conversion Utils
2
+ * Copyright (c) 2000, 2001 Fabrice Bellard
3
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with FFmpeg; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ *
21
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
22
+ */
23
+
24
+#include "libavutil/cpu.h"
25
+#include "libavutil/x86/asm.h"
26
+#include "libavcodec/fmtconvert.h"
27
+#include "libavcodec/dsputil.h"
28
+
29
+#if HAVE_YASM
30
+
31
+void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
32
+void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
33
+
34
+void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
35
+void ff_float_to_int16_sse  (int16_t *dst, const float *src, long len);
36
+void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
37
+
38
+void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
39
+void ff_float_to_int16_step_sse  (int16_t *dst, const float *src, long len, long step);
40
+void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
41
+
42
+void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
43
+void ff_float_to_int16_interleave2_sse  (int16_t *dst, const float **src, long len);
44
+void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
45
+
46
+void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
47
+void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
48
+void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
49
+
50
+#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
51
+
52
+#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
53
+/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
54
+static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
55
+    int c;\
56
+    for(c=0; c<channels; c++){\
57
+        ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
58
+    }\
59
+}\
60
+\
61
+static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
62
+    if(channels==1)\
63
+        ff_float_to_int16_##cpu(dst, src[0], len);\
64
+    else if(channels==2){\
65
+        ff_float_to_int16_interleave2_##cpu(dst, src, len);\
66
+    }else if(channels==6){\
67
+        ff_float_to_int16_interleave6_##cpu(dst, src, len);\
68
+    }else\
69
+        float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
70
+}
71
+
72
+FLOAT_TO_INT16_INTERLEAVE(3dnow)
73
+FLOAT_TO_INT16_INTERLEAVE(sse)
74
+FLOAT_TO_INT16_INTERLEAVE(sse2)
75
+
76
+static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
77
+                                               long len, int channels)
78
+{
79
+    if(channels==6)
80
+        ff_float_to_int16_interleave6_3dnowext(dst, src, len);
81
+    else
82
+        float_to_int16_interleave_3dnow(dst, src, len, channels);
83
+}
84
+
85
+void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
86
+void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
87
+
88
+void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
89
+void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
90
+
91
+static void float_interleave_mmx(float *dst, const float **src,
92
+                                 unsigned int len, int channels)
93
+{
94
+    if (channels == 2) {
95
+        ff_float_interleave2_mmx(dst, src, len);
96
+    } else if (channels == 6)
97
+        ff_float_interleave6_mmx(dst, src, len);
98
+    else
99
+        ff_float_interleave_c(dst, src, len, channels);
100
+}
101
+
102
+static void float_interleave_sse(float *dst, const float **src,
103
+                                 unsigned int len, int channels)
104
+{
105
+    if (channels == 2) {
106
+        ff_float_interleave2_sse(dst, src, len);
107
+    } else if (channels == 6)
108
+        ff_float_interleave6_sse(dst, src, len);
109
+    else
110
+        ff_float_interleave_c(dst, src, len, channels);
111
+}
112
+#endif
113
+
114
+void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
115
+{
116
+#if HAVE_YASM
117
+    int mm_flags = av_get_cpu_flags();
118
+
119
+    if (mm_flags & AV_CPU_FLAG_MMX) {
120
+        c->float_interleave = float_interleave_mmx;
121
+
122
+        if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) {
123
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
124
+                c->float_to_int16 = ff_float_to_int16_3dnow;
125
+                c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
126
+            }
127
+        }
128
+        if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
129
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
130
+                c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
131
+            }
132
+        }
133
+        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
134
+            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
135
+            c->float_to_int16 = ff_float_to_int16_sse;
136
+            c->float_to_int16_interleave = float_to_int16_interleave_sse;
137
+            c->float_interleave = float_interleave_sse;
138
+        }
139
+        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) {
140
+            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
141
+            c->float_to_int16 = ff_float_to_int16_sse2;
142
+            c->float_to_int16_interleave = float_to_int16_interleave_sse2;
143
+        }
144
+    }
145
+#endif
146
+}
0 147
deleted file mode 100644
... ...
@@ -1,147 +0,0 @@
1
-/*
2
- * Format Conversion Utils
3
- * Copyright (c) 2000, 2001 Fabrice Bellard
4
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
- *
6
- * This file is part of FFmpeg.
7
- *
8
- * FFmpeg is free software; you can redistribute it and/or
9
- * modify it under the terms of the GNU Lesser General Public
10
- * License as published by the Free Software Foundation; either
11
- * version 2.1 of the License, or (at your option) any later version.
12
- *
13
- * FFmpeg is distributed in the hope that it will be useful,
14
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
- * Lesser General Public License for more details.
17
- *
18
- * You should have received a copy of the GNU Lesser General Public
19
- * License along with FFmpeg; if not, write to the Free Software
20
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
- *
22
- * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
23
- */
24
-
25
-#include "libavutil/cpu.h"
26
-#include "libavutil/x86/asm.h"
27
-#include "libavcodec/fmtconvert.h"
28
-#include "libavcodec/dsputil.h"
29
-
30
-#if HAVE_YASM
31
-
32
-void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
33
-void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
34
-
35
-void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
36
-void ff_float_to_int16_sse  (int16_t *dst, const float *src, long len);
37
-void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
38
-
39
-void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
40
-void ff_float_to_int16_step_sse  (int16_t *dst, const float *src, long len, long step);
41
-void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
42
-
43
-void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
44
-void ff_float_to_int16_interleave2_sse  (int16_t *dst, const float **src, long len);
45
-void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
46
-
47
-void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
48
-void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
49
-void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
50
-
51
-#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
52
-
53
-#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
54
-/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
55
-static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
56
-    int c;\
57
-    for(c=0; c<channels; c++){\
58
-        ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
59
-    }\
60
-}\
61
-\
62
-static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
63
-    if(channels==1)\
64
-        ff_float_to_int16_##cpu(dst, src[0], len);\
65
-    else if(channels==2){\
66
-        ff_float_to_int16_interleave2_##cpu(dst, src, len);\
67
-    }else if(channels==6){\
68
-        ff_float_to_int16_interleave6_##cpu(dst, src, len);\
69
-    }else\
70
-        float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
71
-}
72
-
73
-FLOAT_TO_INT16_INTERLEAVE(3dnow)
74
-FLOAT_TO_INT16_INTERLEAVE(sse)
75
-FLOAT_TO_INT16_INTERLEAVE(sse2)
76
-
77
-static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
78
-                                               long len, int channels)
79
-{
80
-    if(channels==6)
81
-        ff_float_to_int16_interleave6_3dnowext(dst, src, len);
82
-    else
83
-        float_to_int16_interleave_3dnow(dst, src, len, channels);
84
-}
85
-
86
-void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
87
-void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
88
-
89
-void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
90
-void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
91
-
92
-static void float_interleave_mmx(float *dst, const float **src,
93
-                                 unsigned int len, int channels)
94
-{
95
-    if (channels == 2) {
96
-        ff_float_interleave2_mmx(dst, src, len);
97
-    } else if (channels == 6)
98
-        ff_float_interleave6_mmx(dst, src, len);
99
-    else
100
-        ff_float_interleave_c(dst, src, len, channels);
101
-}
102
-
103
-static void float_interleave_sse(float *dst, const float **src,
104
-                                 unsigned int len, int channels)
105
-{
106
-    if (channels == 2) {
107
-        ff_float_interleave2_sse(dst, src, len);
108
-    } else if (channels == 6)
109
-        ff_float_interleave6_sse(dst, src, len);
110
-    else
111
-        ff_float_interleave_c(dst, src, len, channels);
112
-}
113
-#endif
114
-
115
-void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
116
-{
117
-#if HAVE_YASM
118
-    int mm_flags = av_get_cpu_flags();
119
-
120
-    if (mm_flags & AV_CPU_FLAG_MMX) {
121
-        c->float_interleave = float_interleave_mmx;
122
-
123
-        if (HAVE_AMD3DNOW && mm_flags & AV_CPU_FLAG_3DNOW) {
124
-            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
125
-                c->float_to_int16 = ff_float_to_int16_3dnow;
126
-                c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
127
-            }
128
-        }
129
-        if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) {
130
-            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
131
-                c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
132
-            }
133
-        }
134
-        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) {
135
-            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
136
-            c->float_to_int16 = ff_float_to_int16_sse;
137
-            c->float_to_int16_interleave = float_to_int16_interleave_sse;
138
-            c->float_interleave = float_interleave_sse;
139
-        }
140
-        if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE2) {
141
-            c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
142
-            c->float_to_int16 = ff_float_to_int16_sse2;
143
-            c->float_to_int16_interleave = float_to_int16_interleave_sse2;
144
-        }
145
-    }
146
-#endif
147
-}
148 1
new file mode 100644
... ...
@@ -0,0 +1,385 @@
0
+/*
1
+ * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/cpu.h"
21
+#include "libavutil/x86/asm.h"
22
+#include "libavcodec/h264dsp.h"
23
+#include "dsputil_mmx.h"
24
+
25
+/***********************************/
26
+/* IDCT */
27
+#define IDCT_ADD_FUNC(NUM, DEPTH, OPT)                                  \
28
+void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst,    \
29
+                                                       int16_t *block,  \
30
+                                                       int stride);
31
+
32
+IDCT_ADD_FUNC(, 8, mmx)
33
+IDCT_ADD_FUNC(, 10, sse2)
34
+IDCT_ADD_FUNC(_dc, 8, mmx2)
35
+IDCT_ADD_FUNC(_dc, 10, mmx2)
36
+IDCT_ADD_FUNC(8_dc, 8, mmx2)
37
+IDCT_ADD_FUNC(8_dc, 10, sse2)
38
+IDCT_ADD_FUNC(8, 8, mmx)
39
+IDCT_ADD_FUNC(8, 8, sse2)
40
+IDCT_ADD_FUNC(8, 10, sse2)
41
+#if HAVE_AVX
42
+IDCT_ADD_FUNC(, 10, avx)
43
+IDCT_ADD_FUNC(8_dc, 10, avx)
44
+IDCT_ADD_FUNC(8, 10, avx)
45
+#endif
46
+
47
+
48
+#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)                         \
49
+void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT       \
50
+    (uint8_t *dst, const int *block_offset,                             \
51
+     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
52
+
53
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
54
+IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
55
+IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
56
+IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
57
+IDCT_ADD_REP_FUNC(8, 4, 10, avx)
58
+IDCT_ADD_REP_FUNC(, 16, 8, mmx)
59
+IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
60
+IDCT_ADD_REP_FUNC(, 16, 8, sse2)
61
+IDCT_ADD_REP_FUNC(, 16, 10, sse2)
62
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
63
+IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
64
+IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
65
+IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
66
+#if HAVE_AVX
67
+IDCT_ADD_REP_FUNC(, 16, 10, avx)
68
+IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
69
+#endif
70
+
71
+
72
+#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)                      \
73
+void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT     \
74
+    (uint8_t **dst, const int *block_offset,                          \
75
+     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
76
+
77
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
78
+IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
79
+IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
80
+IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
81
+#if HAVE_AVX
82
+IDCT_ADD_REP_FUNC2(, 8, 10, avx)
83
+#endif
84
+
85
+void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul);
86
+void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
87
+
88
+/***********************************/
89
+/* deblocking */
90
+
91
+void ff_h264_loop_filter_strength_mmx2(int16_t bS[2][4][4], uint8_t nnz[40],
92
+                                       int8_t ref[2][40], int16_t mv[2][40][2],
93
+                                       int bidir, int edges, int step,
94
+                                       int mask_mv0, int mask_mv1, int field);
95
+
96
+#define LF_FUNC(DIR, TYPE, DEPTH, OPT)                                        \
97
+void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
98
+                                                               int stride,    \
99
+                                                               int alpha,     \
100
+                                                               int beta,      \
101
+                                                               int8_t *tc0);
102
+#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
103
+void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
104
+                                                               int stride,    \
105
+                                                               int alpha,     \
106
+                                                               int beta);
107
+
108
+#define LF_FUNCS(type, depth)                   \
109
+LF_FUNC(h,  chroma,       depth, mmx2)          \
110
+LF_IFUNC(h, chroma_intra, depth, mmx2)          \
111
+LF_FUNC(v,  chroma,       depth, mmx2)          \
112
+LF_IFUNC(v, chroma_intra, depth, mmx2)          \
113
+LF_FUNC(h,  luma,         depth, mmx2)          \
114
+LF_IFUNC(h, luma_intra,   depth, mmx2)          \
115
+LF_FUNC(h,  luma,         depth, sse2)          \
116
+LF_IFUNC(h, luma_intra,   depth, sse2)          \
117
+LF_FUNC(v,  luma,         depth, sse2)          \
118
+LF_IFUNC(v, luma_intra,   depth, sse2)          \
119
+LF_FUNC(h,  chroma,       depth, sse2)          \
120
+LF_IFUNC(h, chroma_intra, depth, sse2)          \
121
+LF_FUNC(v,  chroma,       depth, sse2)          \
122
+LF_IFUNC(v, chroma_intra, depth, sse2)          \
123
+LF_FUNC(h,  luma,         depth, avx)           \
124
+LF_IFUNC(h, luma_intra,   depth, avx)           \
125
+LF_FUNC(v,  luma,         depth, avx)           \
126
+LF_IFUNC(v, luma_intra,   depth, avx)           \
127
+LF_FUNC(h,  chroma,       depth, avx)           \
128
+LF_IFUNC(h, chroma_intra, depth, avx)           \
129
+LF_FUNC(v,  chroma,       depth, avx)           \
130
+LF_IFUNC(v, chroma_intra, depth, avx)
131
+
132
+LF_FUNCS(uint8_t,   8)
133
+LF_FUNCS(uint16_t, 10)
134
+
135
+#if ARCH_X86_32 && HAVE_YASM
136
+LF_FUNC(v8, luma, 8, mmx2)
137
+static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha,
138
+                                     int beta, int8_t *tc0)
139
+{
140
+    if ((tc0[0] & tc0[1]) >= 0)
141
+        ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0);
142
+    if ((tc0[2] & tc0[3]) >= 0)
143
+        ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2);
144
+}
145
+
146
+LF_IFUNC(v8, luma_intra, 8, mmx2)
147
+static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride,
148
+                                           int alpha, int beta)
149
+{
150
+    ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta);
151
+    ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta);
152
+}
153
+#endif /* ARCH_X86_32 */
154
+
155
+LF_FUNC(v,  luma,       10, mmx2)
156
+LF_IFUNC(v, luma_intra, 10, mmx2)
157
+
158
+/***********************************/
159
+/* weighted prediction */
160
+
161
+#define H264_WEIGHT(W, OPT)                                             \
162
+void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride,         \
163
+                                      int height, int log2_denom,       \
164
+                                      int weight, int offset);
165
+
166
+#define H264_BIWEIGHT(W, OPT)                                           \
167
+void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src,     \
168
+                                        int stride, int height,         \
169
+                                        int log2_denom, int weightd,    \
170
+                                        int weights, int offset);
171
+
172
+#define H264_BIWEIGHT_MMX(W)                    \
173
+    H264_WEIGHT(W, mmx2)                        \
174
+    H264_BIWEIGHT(W, mmx2)
175
+
176
+#define H264_BIWEIGHT_MMX_SSE(W)                \
177
+    H264_BIWEIGHT_MMX(W)                        \
178
+    H264_WEIGHT(W, sse2)                        \
179
+    H264_BIWEIGHT(W, sse2)                      \
180
+    H264_BIWEIGHT(W, ssse3)
181
+
182
+H264_BIWEIGHT_MMX_SSE(16)
183
+H264_BIWEIGHT_MMX_SSE(8)
184
+H264_BIWEIGHT_MMX(4)
185
+
186
+#define H264_WEIGHT_10(W, DEPTH, OPT)                                   \
187
+void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,       \
188
+                                                    int stride,         \
189
+                                                    int height,         \
190
+                                                    int log2_denom,     \
191
+                                                    int weight,         \
192
+                                                    int offset);
193
+
194
+#define H264_BIWEIGHT_10(W, DEPTH, OPT)                                 \
195
+void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,     \
196
+                                                      uint8_t *src,     \
197
+                                                      int stride,       \
198
+                                                      int height,       \
199
+                                                      int log2_denom,   \
200
+                                                      int weightd,      \
201
+                                                      int weights,      \
202
+                                                      int offset);
203
+
204
+#define H264_BIWEIGHT_10_SSE(W, DEPTH)          \
205
+    H264_WEIGHT_10(W, DEPTH, sse2)              \
206
+    H264_WEIGHT_10(W, DEPTH, sse4)              \
207
+    H264_BIWEIGHT_10(W, DEPTH, sse2)            \
208
+    H264_BIWEIGHT_10(W, DEPTH, sse4)
209
+
210
+H264_BIWEIGHT_10_SSE(16, 10)
211
+H264_BIWEIGHT_10_SSE(8,  10)
212
+H264_BIWEIGHT_10_SSE(4,  10)
213
+
214
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
215
+                         const int chroma_format_idc)
216
+{
217
+#if HAVE_YASM
218
+    int mm_flags = av_get_cpu_flags();
219
+
220
+    if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT)
221
+        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
222
+
223
+    if (bit_depth == 8) {
224
+        if (mm_flags & AV_CPU_FLAG_MMX) {
225
+            c->h264_idct_dc_add   =
226
+            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
227
+            c->h264_idct8_dc_add  =
228
+            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;
229
+
230
+            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
231
+            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
232
+            if (chroma_format_idc == 1)
233
+                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
234
+            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
235
+            if (mm_flags & AV_CPU_FLAG_CMOV)
236
+                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
237
+
238
+            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
239
+                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmx2;
240
+                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
241
+                c->h264_idct_add16   = ff_h264_idct_add16_8_mmx2;
242
+                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmx2;
243
+                if (chroma_format_idc == 1)
244
+                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
245
+                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx2;
246
+
247
+                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmx2;
248
+                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmx2;
249
+                if (chroma_format_idc == 1) {
250
+                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmx2;
251
+                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2;
252
+                }
253
+#if ARCH_X86_32
254
+                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmx2;
255
+                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmx2;
256
+                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
257
+                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
258
+#endif /* ARCH_X86_32 */
259
+                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2;
260
+                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmx2;
261
+                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmx2;
262
+
263
+                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmx2;
264
+                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2;
265
+                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2;
266
+
267
+                if (mm_flags & AV_CPU_FLAG_SSE2) {
268
+                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;
269
+
270
+                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
271
+                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
272
+                    if (chroma_format_idc == 1)
273
+                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
274
+                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
275
+                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
276
+
277
+                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
278
+                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
279
+
280
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
281
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
282
+
283
+#if HAVE_ALIGNED_STACK
284
+                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
285
+                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
286
+                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
287
+                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
288
+#endif /* HAVE_ALIGNED_STACK */
289
+                }
290
+                if (mm_flags & AV_CPU_FLAG_SSSE3) {
291
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
292
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
293
+                }
294
+                if (HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) {
295
+#if HAVE_ALIGNED_STACK
296
+                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
297
+                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
298
+                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
299
+                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
300
+#endif /* HAVE_ALIGNED_STACK */
301
+                }
302
+            }
303
+        }
304
+    } else if (bit_depth == 10) {
305
+        if (mm_flags & AV_CPU_FLAG_MMX) {
306
+            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
307
+#if ARCH_X86_32
308
+                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmx2;
309
+                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
310
+                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmx2;
311
+                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmx2;
312
+                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmx2;
313
+                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmx2;
314
+#endif /* ARCH_X86_32 */
315
+                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2;
316
+                if (mm_flags & AV_CPU_FLAG_SSE2) {
317
+                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
318
+                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
319
+
320
+                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
321
+                    if (chroma_format_idc == 1)
322
+                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
323
+                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
324
+#if HAVE_ALIGNED_STACK
325
+                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
326
+                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
327
+#endif /* HAVE_ALIGNED_STACK */
328
+
329
+                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
330
+                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
331
+                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
332
+
333
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
334
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
335
+                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
336
+
337
+                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
338
+                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
339
+#if HAVE_ALIGNED_STACK
340
+                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
341
+                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
342
+                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
343
+                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
344
+#endif /* HAVE_ALIGNED_STACK */
345
+                }
346
+                if (mm_flags & AV_CPU_FLAG_SSE4) {
347
+                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
348
+                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
349
+                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
350
+
351
+                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
352
+                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
353
+                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
354
+                }
355
+#if HAVE_AVX
356
+                if (mm_flags & AV_CPU_FLAG_AVX) {
357
+                    c->h264_idct_dc_add  =
358
+                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
359
+                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
360
+
361
+                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
362
+                    if (chroma_format_idc == 1)
363
+                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
364
+                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
365
+#if HAVE_ALIGNED_STACK
366
+                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
367
+                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
368
+#endif /* HAVE_ALIGNED_STACK */
369
+
370
+                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
371
+                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
372
+#if HAVE_ALIGNED_STACK
373
+                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
374
+                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
375
+                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
376
+                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
377
+#endif /* HAVE_ALIGNED_STACK */
378
+                }
379
+#endif /* HAVE_AVX */
380
+            }
381
+        }
382
+    }
383
+#endif /* HAVE_YASM */
384
+}
0 385
deleted file mode 100644
... ...
@@ -1,385 +0,0 @@
1
-/*
2
- * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
3
- *
4
- * This file is part of FFmpeg.
5
- *
6
- * FFmpeg is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU Lesser General Public
8
- * License as published by the Free Software Foundation; either
9
- * version 2.1 of the License, or (at your option) any later version.
10
- *
11
- * FFmpeg is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
- * Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public
17
- * License along with FFmpeg; if not, write to the Free Software
18
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
- */
20
-
21
-#include "libavutil/cpu.h"
22
-#include "libavutil/x86/asm.h"
23
-#include "libavcodec/h264dsp.h"
24
-#include "dsputil_mmx.h"
25
-
26
-/***********************************/
27
-/* IDCT */
28
-#define IDCT_ADD_FUNC(NUM, DEPTH, OPT)                                  \
29
-void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst,    \
30
-                                                       int16_t *block,  \
31
-                                                       int stride);
32
-
33
-IDCT_ADD_FUNC(, 8, mmx)
34
-IDCT_ADD_FUNC(, 10, sse2)
35
-IDCT_ADD_FUNC(_dc, 8, mmx2)
36
-IDCT_ADD_FUNC(_dc, 10, mmx2)
37
-IDCT_ADD_FUNC(8_dc, 8, mmx2)
38
-IDCT_ADD_FUNC(8_dc, 10, sse2)
39
-IDCT_ADD_FUNC(8, 8, mmx)
40
-IDCT_ADD_FUNC(8, 8, sse2)
41
-IDCT_ADD_FUNC(8, 10, sse2)
42
-#if HAVE_AVX
43
-IDCT_ADD_FUNC(, 10, avx)
44
-IDCT_ADD_FUNC(8_dc, 10, avx)
45
-IDCT_ADD_FUNC(8, 10, avx)
46
-#endif
47
-
48
-
49
-#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT)                         \
50
-void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT       \
51
-    (uint8_t *dst, const int *block_offset,                             \
52
-     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
53
-
54
-IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
55
-IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
56
-IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
57
-IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
58
-IDCT_ADD_REP_FUNC(8, 4, 10, avx)
59
-IDCT_ADD_REP_FUNC(, 16, 8, mmx)
60
-IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
61
-IDCT_ADD_REP_FUNC(, 16, 8, sse2)
62
-IDCT_ADD_REP_FUNC(, 16, 10, sse2)
63
-IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
64
-IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
65
-IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
66
-IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
67
-#if HAVE_AVX
68
-IDCT_ADD_REP_FUNC(, 16, 10, avx)
69
-IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
70
-#endif
71
-
72
-
73
-#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT)                      \
74
-void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT     \
75
-    (uint8_t **dst, const int *block_offset,                          \
76
-     DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]);
77
-
78
-IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
79
-IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
80
-IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
81
-IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
82
-#if HAVE_AVX
83
-IDCT_ADD_REP_FUNC2(, 8, 10, avx)
84
-#endif
85
-
86
-void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul);
87
-void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
88
-
89
-/***********************************/
90
-/* deblocking */
91
-
92
-void ff_h264_loop_filter_strength_mmx2(int16_t bS[2][4][4], uint8_t nnz[40],
93
-                                       int8_t ref[2][40], int16_t mv[2][40][2],
94
-                                       int bidir, int edges, int step,
95
-                                       int mask_mv0, int mask_mv1, int field);
96
-
97
-#define LF_FUNC(DIR, TYPE, DEPTH, OPT)                                        \
98
-void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
99
-                                                               int stride,    \
100
-                                                               int alpha,     \
101
-                                                               int beta,      \
102
-                                                               int8_t *tc0);
103
-#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
104
-void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
105
-                                                               int stride,    \
106
-                                                               int alpha,     \
107
-                                                               int beta);
108
-
109
-#define LF_FUNCS(type, depth)                   \
110
-LF_FUNC(h,  chroma,       depth, mmx2)          \
111
-LF_IFUNC(h, chroma_intra, depth, mmx2)          \
112
-LF_FUNC(v,  chroma,       depth, mmx2)          \
113
-LF_IFUNC(v, chroma_intra, depth, mmx2)          \
114
-LF_FUNC(h,  luma,         depth, mmx2)          \
115
-LF_IFUNC(h, luma_intra,   depth, mmx2)          \
116
-LF_FUNC(h,  luma,         depth, sse2)          \
117
-LF_IFUNC(h, luma_intra,   depth, sse2)          \
118
-LF_FUNC(v,  luma,         depth, sse2)          \
119
-LF_IFUNC(v, luma_intra,   depth, sse2)          \
120
-LF_FUNC(h,  chroma,       depth, sse2)          \
121
-LF_IFUNC(h, chroma_intra, depth, sse2)          \
122
-LF_FUNC(v,  chroma,       depth, sse2)          \
123
-LF_IFUNC(v, chroma_intra, depth, sse2)          \
124
-LF_FUNC(h,  luma,         depth, avx)           \
125
-LF_IFUNC(h, luma_intra,   depth, avx)           \
126
-LF_FUNC(v,  luma,         depth, avx)           \
127
-LF_IFUNC(v, luma_intra,   depth, avx)           \
128
-LF_FUNC(h,  chroma,       depth, avx)           \
129
-LF_IFUNC(h, chroma_intra, depth, avx)           \
130
-LF_FUNC(v,  chroma,       depth, avx)           \
131
-LF_IFUNC(v, chroma_intra, depth, avx)
132
-
133
-LF_FUNCS(uint8_t,   8)
134
-LF_FUNCS(uint16_t, 10)
135
-
136
-#if ARCH_X86_32 && HAVE_YASM
137
-LF_FUNC(v8, luma, 8, mmx2)
138
-static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha,
139
-                                     int beta, int8_t *tc0)
140
-{
141
-    if ((tc0[0] & tc0[1]) >= 0)
142
-        ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0);
143
-    if ((tc0[2] & tc0[3]) >= 0)
144
-        ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2);
145
-}
146
-
147
-LF_IFUNC(v8, luma_intra, 8, mmx2)
148
-static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride,
149
-                                           int alpha, int beta)
150
-{
151
-    ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta);
152
-    ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta);
153
-}
154
-#endif /* ARCH_X86_32 */
155
-
156
-LF_FUNC(v,  luma,       10, mmx2)
157
-LF_IFUNC(v, luma_intra, 10, mmx2)
158
-
159
-/***********************************/
160
-/* weighted prediction */
161
-
162
-#define H264_WEIGHT(W, OPT)                                             \
163
-void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride,         \
164
-                                      int height, int log2_denom,       \
165
-                                      int weight, int offset);
166
-
167
-#define H264_BIWEIGHT(W, OPT)                                           \
168
-void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src,     \
169
-                                        int stride, int height,         \
170
-                                        int log2_denom, int weightd,    \
171
-                                        int weights, int offset);
172
-
173
-#define H264_BIWEIGHT_MMX(W)                    \
174
-    H264_WEIGHT(W, mmx2)                        \
175
-    H264_BIWEIGHT(W, mmx2)
176
-
177
-#define H264_BIWEIGHT_MMX_SSE(W)                \
178
-    H264_BIWEIGHT_MMX(W)                        \
179
-    H264_WEIGHT(W, sse2)                        \
180
-    H264_BIWEIGHT(W, sse2)                      \
181
-    H264_BIWEIGHT(W, ssse3)
182
-
183
-H264_BIWEIGHT_MMX_SSE(16)
184
-H264_BIWEIGHT_MMX_SSE(8)
185
-H264_BIWEIGHT_MMX(4)
186
-
187
-#define H264_WEIGHT_10(W, DEPTH, OPT)                                   \
188
-void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,       \
189
-                                                    int stride,         \
190
-                                                    int height,         \
191
-                                                    int log2_denom,     \
192
-                                                    int weight,         \
193
-                                                    int offset);
194
-
195
-#define H264_BIWEIGHT_10(W, DEPTH, OPT)                                 \
196
-void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,     \
197
-                                                      uint8_t *src,     \
198
-                                                      int stride,       \
199
-                                                      int height,       \
200
-                                                      int log2_denom,   \
201
-                                                      int weightd,      \
202
-                                                      int weights,      \
203
-                                                      int offset);
204
-
205
-#define H264_BIWEIGHT_10_SSE(W, DEPTH)          \
206
-    H264_WEIGHT_10(W, DEPTH, sse2)              \
207
-    H264_WEIGHT_10(W, DEPTH, sse4)              \
208
-    H264_BIWEIGHT_10(W, DEPTH, sse2)            \
209
-    H264_BIWEIGHT_10(W, DEPTH, sse4)
210
-
211
-H264_BIWEIGHT_10_SSE(16, 10)
212
-H264_BIWEIGHT_10_SSE(8,  10)
213
-H264_BIWEIGHT_10_SSE(4,  10)
214
-
215
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
216
-                         const int chroma_format_idc)
217
-{
218
-#if HAVE_YASM
219
-    int mm_flags = av_get_cpu_flags();
220
-
221
-    if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMXEXT)
222
-        c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2;
223
-
224
-    if (bit_depth == 8) {
225
-        if (mm_flags & AV_CPU_FLAG_MMX) {
226
-            c->h264_idct_dc_add   =
227
-            c->h264_idct_add      = ff_h264_idct_add_8_mmx;
228
-            c->h264_idct8_dc_add  =
229
-            c->h264_idct8_add     = ff_h264_idct8_add_8_mmx;
230
-
231
-            c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
232
-            c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
233
-            if (chroma_format_idc == 1)
234
-                c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
235
-            c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
236
-            if (mm_flags & AV_CPU_FLAG_CMOV)
237
-                c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
238
-
239
-            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
240
-                c->h264_idct_dc_add  = ff_h264_idct_dc_add_8_mmx2;
241
-                c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
242
-                c->h264_idct_add16   = ff_h264_idct_add16_8_mmx2;
243
-                c->h264_idct8_add4   = ff_h264_idct8_add4_8_mmx2;
244
-                if (chroma_format_idc == 1)
245
-                    c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
246
-                c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx2;
247
-
248
-                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmx2;
249
-                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmx2;
250
-                if (chroma_format_idc == 1) {
251
-                    c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmx2;
252
-                    c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2;
253
-                }
254
-#if ARCH_X86_32
255
-                c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_mmx2;
256
-                c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_mmx2;
257
-                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
258
-                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
259
-#endif /* ARCH_X86_32 */
260
-                c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2;
261
-                c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmx2;
262
-                c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmx2;
263
-
264
-                c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmx2;
265
-                c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2;
266
-                c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2;
267
-
268
-                if (mm_flags & AV_CPU_FLAG_SSE2) {
269
-                    c->h264_idct8_add  = ff_h264_idct8_add_8_sse2;
270
-
271
-                    c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
272
-                    c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
273
-                    if (chroma_format_idc == 1)
274
-                        c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
275
-                    c->h264_idct_add16intra      = ff_h264_idct_add16intra_8_sse2;
276
-                    c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2;
277
-
278
-                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2;
279
-                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2;
280
-
281
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2;
282
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2;
283
-
284
-#if HAVE_ALIGNED_STACK
285
-                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_sse2;
286
-                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_sse2;
287
-                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
288
-                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
289
-#endif /* HAVE_ALIGNED_STACK */
290
-                }
291
-                if (mm_flags & AV_CPU_FLAG_SSSE3) {
292
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
293
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3;
294
-                }
295
-                if (HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) {
296
-#if HAVE_ALIGNED_STACK
297
-                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_8_avx;
298
-                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_8_avx;
299
-                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
300
-                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
301
-#endif /* HAVE_ALIGNED_STACK */
302
-                }
303
-            }
304
-        }
305
-    } else if (bit_depth == 10) {
306
-        if (mm_flags & AV_CPU_FLAG_MMX) {
307
-            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
308
-#if ARCH_X86_32
309
-                c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmx2;
310
-                c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2;
311
-                c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmx2;
312
-                c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmx2;
313
-                c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmx2;
314
-                c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_mmx2;
315
-#endif /* ARCH_X86_32 */
316
-                c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2;
317
-                if (mm_flags & AV_CPU_FLAG_SSE2) {
318
-                    c->h264_idct_add     = ff_h264_idct_add_10_sse2;
319
-                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
320
-
321
-                    c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
322
-                    if (chroma_format_idc == 1)
323
-                        c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
324
-                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
325
-#if HAVE_ALIGNED_STACK
326
-                    c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
327
-                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2;
328
-#endif /* HAVE_ALIGNED_STACK */
329
-
330
-                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2;
331
-                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2;
332
-                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2;
333
-
334
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2;
335
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2;
336
-                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2;
337
-
338
-                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
339
-                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
340
-#if HAVE_ALIGNED_STACK
341
-                    c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
342
-                    c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
343
-                    c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
344
-                    c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
345
-#endif /* HAVE_ALIGNED_STACK */
346
-                }
347
-                if (mm_flags & AV_CPU_FLAG_SSE4) {
348
-                    c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4;
349
-                    c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4;
350
-                    c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4;
351
-
352
-                    c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4;
353
-                    c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4;
354
-                    c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4;
355
-                }
356
-#if HAVE_AVX
357
-                if (mm_flags & AV_CPU_FLAG_AVX) {
358
-                    c->h264_idct_dc_add  =
359
-                    c->h264_idct_add     = ff_h264_idct_add_10_avx;
360
-                    c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
361
-
362
-                    c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
363
-                    if (chroma_format_idc == 1)
364
-                        c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
365
-                    c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
366
-#if HAVE_ALIGNED_STACK
367
-                    c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
368
-                    c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx;
369
-#endif /* HAVE_ALIGNED_STACK */
370
-
371
-                    c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
372
-                    c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
373
-#if HAVE_ALIGNED_STACK
374
-                    c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
375
-                    c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;
376
-                    c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_avx;
377
-                    c->h264_h_loop_filter_luma_intra   = ff_deblock_h_luma_intra_10_avx;
378
-#endif /* HAVE_ALIGNED_STACK */
379
-                }
380
-#endif /* HAVE_AVX */
381
-            }
382
-        }
383
-    }
384
-#endif /* HAVE_YASM */
385
-}
... ...
@@ -360,13 +360,5 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
360 360
         block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
361 361
     }
362 362
     end:
363
-/*
364
-    for(i=0; i<last_non_zero_p1; i++)
365
-    {
366
-       int j= zigzag_direct_noperm[i];
367
-       block[block_permute_op(j)]= temp_block[j];
368
-    }
369
-*/
370
-
371 363
     return last_non_zero_p1 - 1;
372 364
 }
... ...
@@ -33,15 +33,6 @@
33 33
 #define HMAC_OPAD_VAL 0x5C
34 34
 
35 35
 /**
36
- * A non-zero transaction id requires the server to send back
37
- * a _result or _error response.
38
- * Setting it to 0 marks the message as a notification not
39
- * requiring feedback.
40
- */
41
-
42
-#define RTMP_NOTIFICATION 0
43
-
44
-/**
45 36
  * emulated Flash client version - 9.0.124.2 on Linux
46 37
  * @{
47 38
  */
... ...
@@ -91,7 +91,11 @@ typedef struct RTMPContext {
91 91
     int           nb_invokes;                 ///< keeps track of invoke messages
92 92
     char*         tcurl;                      ///< url of the target stream
93 93
     char*         flashver;                   ///< version of the flash plugin
94
+    char*         swfhash;                    ///< SHA256 hash of the decompressed SWF file (32 bytes)
95
+    int           swfhash_len;                ///< length of the SHA256 hash
96
+    int           swfsize;                    ///< size of the decompressed SWF file
94 97
     char*         swfurl;                     ///< url of the swf player
98
+    char          swfverification[42];        ///< hash of the SWF verification
95 99
     char*         pageurl;                    ///< url of the web page
96 100
     char*         subscribe;                  ///< name of live stream to subscribe
97 101
     int           server_bw;                  ///< server bandwidth
... ...
@@ -593,6 +597,27 @@ static int gen_pong(URLContext *s, RTMPContext *rt, RTMPPacket *ppkt)
593 593
 }
594 594
 
595 595
 /**
596
+ * Generate SWF verification message and send it to the server.
597
+ */
598
+static int gen_swf_verification(URLContext *s, RTMPContext *rt)
599
+{
600
+    RTMPPacket pkt;
601
+    uint8_t *p;
602
+    int ret;
603
+
604
+    av_log(s, AV_LOG_DEBUG, "Sending SWF verification...\n");
605
+    if ((ret = ff_rtmp_packet_create(&pkt, RTMP_NETWORK_CHANNEL, RTMP_PT_PING,
606
+                                     0, 44)) < 0)
607
+        return ret;
608
+
609
+    p = pkt.data;
610
+    bytestream_put_be16(&p, 27);
611
+    memcpy(p, rt->swfverification, 42);
612
+
613
+    return rtmp_send_packet(rt, &pkt, 0);
614
+}
615
+
616
+/**
596 617
  * Generate server bandwidth message and send it to the server.
597 618
  */
598 619
 static int gen_server_bw(URLContext *s, RTMPContext *rt)
... ...
@@ -626,10 +651,10 @@ static int gen_check_bw(URLContext *s, RTMPContext *rt)
626 626
 
627 627
     p = pkt.data;
628 628
     ff_amf_write_string(&p, "_checkbw");
629
-    ff_amf_write_number(&p, RTMP_NOTIFICATION);
629
+    ff_amf_write_number(&p, ++rt->nb_invokes);
630 630
     ff_amf_write_null(&p);
631 631
 
632
-    return rtmp_send_packet(rt, &pkt, 0);
632
+    return rtmp_send_packet(rt, &pkt, 1);
633 633
 }
634 634
 
635 635
 /**
... ...
@@ -776,6 +801,30 @@ static int rtmp_validate_digest(uint8_t *buf, int off)
776 776
     return 0;
777 777
 }
778 778
 
779
+static int rtmp_calc_swf_verification(URLContext *s, RTMPContext *rt,
780
+                                      uint8_t *buf)
781
+{
782
+    uint8_t *p;
783
+    int ret;
784
+
785
+    if (rt->swfhash_len != 32) {
786
+        av_log(s, AV_LOG_ERROR,
787
+               "Hash of the decompressed SWF file is not 32 bytes long.\n");
788
+        return AVERROR(EINVAL);
789
+    }
790
+
791
+    p = &rt->swfverification[0];
792
+    bytestream_put_byte(&p, 1);
793
+    bytestream_put_byte(&p, 1);
794
+    bytestream_put_be32(&p, rt->swfsize);
795
+    bytestream_put_be32(&p, rt->swfsize);
796
+
797
+    if ((ret = ff_rtmp_calc_digest(rt->swfhash, 32, 0, buf, 32, p)) < 0)
798
+        return ret;
799
+
800
+    return 0;
801
+}
802
+
779 803
 /**
780 804
  * Perform handshake with the server by means of exchanging pseudorandom data
781 805
  * signed with HMAC-SHA2 digest.
... ...
@@ -866,6 +915,14 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
866 866
             }
867 867
         }
868 868
 
869
+        /* Generate SWFVerification token (SHA256 HMAC hash of decompressed SWF,
870
+         * key are the last 32 bytes of the server handshake. */
871
+        if (rt->swfsize) {
872
+            if ((ret = rtmp_calc_swf_verification(s, rt, serverdata + 1 +
873
+                                                  RTMP_HANDSHAKE_PACKET_SIZE - 32)) < 0)
874
+                return ret;
875
+        }
876
+
869 877
         ret = ff_rtmp_calc_digest(tosend + 1 + client_pos, 32, 0,
870 878
                                   rtmp_server_key, sizeof(rtmp_server_key),
871 879
                                   digest);
... ...
@@ -1001,6 +1058,13 @@ static int handle_ping(URLContext *s, RTMPPacket *pkt)
1001 1001
     if (t == 6) {
1002 1002
         if ((ret = gen_pong(s, rt, pkt)) < 0)
1003 1003
             return ret;
1004
+    } else if (t == 26) {
1005
+        if (rt->swfsize) {
1006
+            if ((ret = gen_swf_verification(s, rt)) < 0)
1007
+                return ret;
1008
+        } else {
1009
+            av_log(s, AV_LOG_WARNING, "Ignoring SWFVerification request.\n");
1010
+        }
1004 1011
     }
1005 1012
 
1006 1013
     return 0;
... ...
@@ -1055,15 +1119,27 @@ static int handle_server_bw(URLContext *s, RTMPPacket *pkt)
1055 1055
 static int handle_invoke_error(URLContext *s, RTMPPacket *pkt)
1056 1056
 {
1057 1057
     const uint8_t *data_end = pkt->data + pkt->data_size;
1058
+    char *tracked_method = NULL;
1059
+    int level = AV_LOG_ERROR;
1058 1060
     uint8_t tmpstr[256];
1061
+    int ret;
1062
+
1063
+    if ((ret = find_tracked_method(s, pkt, 9, &tracked_method)) < 0)
1064
+        return ret;
1059 1065
 
1060 1066
     if (!ff_amf_get_field_value(pkt->data + 9, data_end,
1061 1067
                                 "description", tmpstr, sizeof(tmpstr))) {
1062
-        av_log(s, AV_LOG_ERROR, "Server error: %s\n", tmpstr);
1063
-        return -1;
1068
+        if (tracked_method && !strcmp(tracked_method, "_checkbw")) {
1069
+            /* Ignore _checkbw errors. */
1070
+            level = AV_LOG_WARNING;
1071
+            ret = 0;
1072
+        } else
1073
+            ret = -1;
1074
+        av_log(s, level, "Server error: %s\n", tmpstr);
1064 1075
     }
1065 1076
 
1066
-    return 0;
1077
+    av_free(tracked_method);
1078
+    return ret;
1067 1079
 }
1068 1080
 
1069 1081
 static int handle_invoke_result(URLContext *s, RTMPPacket *pkt)
... ...
@@ -1705,6 +1781,8 @@ static const AVOption rtmp_options[] = {
1705 1705
     {"rtmp_pageurl", "URL of the web page in which the media was embedded. By default no value will be sent.", OFFSET(pageurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC},
1706 1706
     {"rtmp_playpath", "Stream identifier to play or to publish", OFFSET(playpath), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
1707 1707
     {"rtmp_subscribe", "Name of live stream to subscribe to. Defaults to rtmp_playpath.", OFFSET(subscribe), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC},
1708
+    {"rtmp_swfhash", "SHA256 hash of the decompressed SWF file (32 bytes).", OFFSET(swfhash), AV_OPT_TYPE_BINARY, .flags = DEC},
1709
+    {"rtmp_swfsize", "Size of the decompressed SWF file, required for SWFVerification.", OFFSET(swfsize), AV_OPT_TYPE_INT, {0}, 0, INT_MAX, DEC},
1708 1710
     {"rtmp_swfurl", "URL of the SWF player. By default no value will be sent", OFFSET(swfurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
1709 1711
     {"rtmp_tcurl", "URL of the target stream. Defaults to proto://host[:port]/app.", OFFSET(tcurl), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, DEC|ENC},
1710 1712
     { NULL },
... ...
@@ -31,7 +31,7 @@
31 31
 
32 32
 #define LIBAVFORMAT_VERSION_MAJOR 54
33 33
 #define LIBAVFORMAT_VERSION_MINOR 23
34
-#define LIBAVFORMAT_VERSION_MICRO 100
34
+#define LIBAVFORMAT_VERSION_MICRO 101
35 35
 
36 36
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
37 37
                                                LIBAVFORMAT_VERSION_MINOR, \
... ...
@@ -335,7 +335,9 @@ int ff_audio_mix_init(AVAudioResampleContext *avr)
335 335
                                       avr->out_channel_layout,
336 336
                                       avr->center_mix_level,
337 337
                                       avr->surround_mix_level,
338
-                                      avr->lfe_mix_level, 1, matrix_dbl,
338
+                                      avr->lfe_mix_level,
339
+                                      avr->normalize_mix_level,
340
+                                      matrix_dbl,
339 341
                                       avr->in_channels,
340 342
                                       avr->matrix_encoding);
341 343
         if (ret < 0) {
... ...
@@ -45,6 +45,7 @@ struct AVAudioResampleContext {
45 45
     double center_mix_level;                    /**< center mix level       */
46 46
     double surround_mix_level;                  /**< surround mix level     */
47 47
     double lfe_mix_level;                       /**< lfe mix level          */
48
+    int normalize_mix_level;                    /**< enable mix level normalization */
48 49
     int force_resampling;                       /**< force resampling       */
49 50
     int filter_size;                            /**< length of each FIR filter in the resampling filterbank relative to the cutoff frequency */
50 51
     int phase_shift;                            /**< log2 of the number of entries in the resampling polyphase filterbank */
... ...
@@ -47,6 +47,7 @@ static const AVOption options[] = {
47 47
     { "center_mix_level",       "Center Mix Level",         OFFSET(center_mix_level),       AV_OPT_TYPE_DOUBLE, { M_SQRT1_2             }, -32.0,                32.0,                   PARAM },
48 48
     { "surround_mix_level",     "Surround Mix Level",       OFFSET(surround_mix_level),     AV_OPT_TYPE_DOUBLE, { M_SQRT1_2             }, -32.0,                32.0,                   PARAM },
49 49
     { "lfe_mix_level",          "LFE Mix Level",            OFFSET(lfe_mix_level),          AV_OPT_TYPE_DOUBLE, { 0.0                   }, -32.0,                32.0,                   PARAM },
50
+    { "normalize_mix_level",    "Normalize Mix Level",      OFFSET(normalize_mix_level),    AV_OPT_TYPE_INT,    { 1                     }, 0,                    1,                      PARAM },
50 51
     { "force_resampling",       "Force Resampling",         OFFSET(force_resampling),       AV_OPT_TYPE_INT,    { 0                     }, 0,                    1,                      PARAM },
51 52
     { "filter_size",            "Resampling Filter Size",   OFFSET(filter_size),            AV_OPT_TYPE_INT,    { 16                    }, 0,                    32, /* ??? */           PARAM },
52 53
     { "phase_shift",            "Resampling Phase Shift",   OFFSET(phase_shift),            AV_OPT_TYPE_INT,    { 10                    }, 0,                    30, /* ??? */           PARAM },