Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
dwt: check malloc calls
ppc: Drop unused header regs.h
af_resample: remove an extra space in the log output
Convert vector_fmul range of functions to YASM and add AVX versions
lavfi: add an audio split filter
lavfi: rename vf_split.c to split.c

Conflicts:
doc/filters.texi
libavcodec/ppc/regs.h
libavfilter/Makefile
libavfilter/allfilters.c
libavfilter/f_split.c
libavfilter/split.c
libavfilter/version.h
libavfilter/vf_split.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2012/05/23 06:40:58
Showing 19 changed files
... ...
@@ -32,6 +32,7 @@ version next:
32 32
 - accept + prefix to -pix_fmt option to disable automatic conversions.
33 33
 - audio filters support in libavfilter and avconv
34 34
 - add fps filter
35
+- audio split filter
35 36
 
36 37
 
37 38
 version 0.10:
... ...
@@ -322,6 +322,12 @@ outputs, like in:
322 322
 [in] asplit=3 [out0][out1][out2]
323 323
 @end example
324 324
 
325
+@example
326
+ffmpeg -i INPUT -filter_complex asplit=5 OUTPUT
327
+@end example
328
+will create 5 copies of the input audio.
329
+
330
+
325 331
 @section astreamsync
326 332
 
327 333
 Forward two audio streams and control the order the buffers are forwarded.
... ...
@@ -267,8 +267,8 @@ static const int8_t sbr_offset[6][16] = {
267 267
 };
268 268
 
269 269
 ///< window coefficients for analysis/synthesis QMF banks
270
-static DECLARE_ALIGNED(16, float, sbr_qmf_window_ds)[320];
271
-static DECLARE_ALIGNED(16, float, sbr_qmf_window_us)[640] = {
270
+static DECLARE_ALIGNED(32, float, sbr_qmf_window_ds)[320];
271
+static DECLARE_ALIGNED(32, float, sbr_qmf_window_us)[640] = {
272 272
      0.0000000000, -0.0005525286, -0.0005617692, -0.0004947518,
273 273
     -0.0004875227, -0.0004893791, -0.0005040714, -0.0005226564,
274 274
     -0.0005466565, -0.0005677802, -0.0005870930, -0.0006132747,
... ...
@@ -33,8 +33,8 @@
33 33
 
34 34
 #include <stdint.h>
35 35
 
36
-DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024)[1024];
37
-DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128)[128];
36
+DECLARE_ALIGNED(32, float,  ff_aac_kbd_long_1024)[1024];
37
+DECLARE_ALIGNED(32, float,  ff_aac_kbd_short_128)[128];
38 38
 
39 39
 const uint8_t ff_aac_num_swb_1024[] = {
40 40
     41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40
... ...
@@ -44,8 +44,8 @@
44 44
 /* @name window coefficients
45 45
  * @{
46 46
  */
47
-DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_long_1024)[1024];
48
-DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_short_128)[128];
47
+DECLARE_ALIGNED(32, extern float,  ff_aac_kbd_long_1024)[1024];
48
+DECLARE_ALIGNED(32, extern float,  ff_aac_kbd_short_128)[128];
49 49
 // @}
50 50
 
51 51
 /* @name number of scalefactor window bands for long and short transform windows respectively
... ...
@@ -402,7 +402,7 @@ typedef struct DSPContext {
402 402
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
403 403
     void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
404 404
     void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
405
-    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
405
+    /* assume len is a multiple of 16, and arrays are 32-byte aligned */
406 406
     void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len);
407 407
     void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
408 408
     /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
... ...
@@ -35,10 +35,24 @@ void ff_slice_buffer_init(slice_buffer *buf, int line_count,
35 35
     buf->line_width  = line_width;
36 36
     buf->data_count  = max_allocated_lines;
37 37
     buf->line        = av_mallocz(sizeof(IDWTELEM *) * line_count);
38
+    if (!buf->line)
39
+        return AVERROR(ENOMEM);
38 40
     buf->data_stack  = av_malloc(sizeof(IDWTELEM *) * max_allocated_lines);
41
+    if (!buf->data_stack) {
42
+        av_free(buf->line);
43
+        return AVERROR(ENOMEM);
44
+    }
39 45
 
40
-    for (i = 0; i < max_allocated_lines; i++)
46
+    for (i = 0; i < max_allocated_lines; i++) {
41 47
         buf->data_stack[i] = av_malloc(sizeof(IDWTELEM) * line_width);
48
+        if (!buf->data_stack[i]) {
49
+            for (i--; i >=0; i--)
50
+                av_free(buf->data_stack[i]);
51
+            av_free(buf->data_stack);
52
+            av_free(buf->line);
53
+            return AVERROR(ENOMEM);
54
+        }
55
+    }
42 56
 
43 57
     buf->data_stack_top = max_allocated_lines - 1;
44 58
 }
45 59
deleted file mode 100644
... ...
@@ -1,37 +0,0 @@
1
-/*
2
- * Copyright (c) 2010 Mans Rullgard
3
- *
4
- * This file is part of FFmpeg.
5
- *
6
- * FFmpeg is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU Lesser General Public
8
- * License as published by the Free Software Foundation; either
9
- * version 2.1 of the License, or (at your option) any later version.
10
- *
11
- * FFmpeg is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
- * Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public
17
- * License along with FFmpeg; if not, write to the Free Software
18
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
- */
20
-
21
-#ifndef AVCODEC_PPC_REGS_H
22
-#define AVCODEC_PPC_REGS_H
23
-
24
-#include "libavutil/avutil.h"
25
-#include "config.h"
26
-
27
-#if HAVE_IBM_ASM
28
-#   define r(n) AV_TOSTRING(n)
29
-#   define f(n) AV_TOSTRING(n)
30
-#   define v(n) AV_TOSTRING(n)
31
-#else
32
-#   define r(n) AV_TOSTRING(r ## n)
33
-#   define f(n) AV_TOSTRING(f ## n)
34
-#   define v(n) AV_TOSTRING(v ## n)
35
-#endif
36
-
37
-#endif /* AVCODEC_PPC_REGS_H */
... ...
@@ -38,8 +38,8 @@
38 38
 typedef struct {
39 39
     AVFrame frame;
40 40
     DSPContext dsp;
41
-    DECLARE_ALIGNED(16, float,   sp_lpc)[FFALIGN(36, 8)];   ///< LPC coefficients for speech data (spec: A)
42
-    DECLARE_ALIGNED(16, float, gain_lpc)[FFALIGN(10, 8)];   ///< LPC coefficients for gain        (spec: GB)
41
+    DECLARE_ALIGNED(32, float,   sp_lpc)[FFALIGN(36, 16)];   ///< LPC coefficients for speech data (spec: A)
42
+    DECLARE_ALIGNED(32, float, gain_lpc)[FFALIGN(10, 16)];   ///< LPC coefficients for gain        (spec: GB)
43 43
 
44 44
     /** speech data history                                      (spec: SB).
45 45
      *  Its first 70 coefficients are updated only at backward filtering.
... ...
@@ -133,11 +133,11 @@ static void do_hybrid_window(RA288Context *ractx,
133 133
     int i;
134 134
     float buffer1[MAX_BACKWARD_FILTER_ORDER + 1];
135 135
     float buffer2[MAX_BACKWARD_FILTER_ORDER + 1];
136
-    LOCAL_ALIGNED_16(float, work, [FFALIGN(MAX_BACKWARD_FILTER_ORDER +
137
-                                           MAX_BACKWARD_FILTER_LEN   +
138
-                                           MAX_BACKWARD_FILTER_NONREC, 8)]);
136
+    LOCAL_ALIGNED(32, float, work, [FFALIGN(MAX_BACKWARD_FILTER_ORDER +
137
+                                            MAX_BACKWARD_FILTER_LEN   +
138
+                                            MAX_BACKWARD_FILTER_NONREC, 16)]);
139 139
 
140
-    ractx->dsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 8));
140
+    ractx->dsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16));
141 141
 
142 142
     convolve(buffer1, work + order    , n      , order);
143 143
     convolve(buffer2, work + order + n, non_rec, order);
... ...
@@ -164,7 +164,7 @@ static void backward_filter(RA288Context *ractx,
164 164
     do_hybrid_window(ractx, order, n, non_rec, temp, hist, rec, window);
165 165
 
166 166
     if (!compute_lpc_coefs(temp, order, lpc, 0, 1, 1))
167
-        ractx->dsp.vector_fmul(lpc, lpc, tab, FFALIGN(order, 8));
167
+        ractx->dsp.vector_fmul(lpc, lpc, tab, FFALIGN(order, 16));
168 168
 
169 169
     memmove(hist, hist + n, move_size*sizeof(*hist));
170 170
 }
... ...
@@ -97,7 +97,7 @@ static const int16_t codetable[128][5]={
97 97
     {  3746,  -606,    53,  -269, -3301}, {   606,  2018, -1316,  4064,   398}
98 98
 };
99 99
 
100
-DECLARE_ALIGNED(16, static const float, syn_window)[FFALIGN(111, 8)]={
100
+DECLARE_ALIGNED(32, static const float, syn_window)[FFALIGN(111, 16)]={
101 101
   0.576690972, 0.580838025, 0.585013986, 0.589219987, 0.59345597,  0.597723007,
102 102
   0.602020264, 0.606384277, 0.610748291, 0.615142822, 0.619598389, 0.624084473,
103 103
   0.628570557, 0.633117676, 0.637695313, 0.642272949, 0.646911621, 0.651580811,
... ...
@@ -119,7 +119,7 @@ DECLARE_ALIGNED(16, static const float, syn_window)[FFALIGN(111, 8)]={
119 119
   0.142852783, 0.0954284668,0.0477600098
120 120
 };
121 121
 
122
-DECLARE_ALIGNED(16, static const float, gain_window)[FFALIGN(38, 8)]={
122
+DECLARE_ALIGNED(32, static const float, gain_window)[FFALIGN(38, 16)]={
123 123
   0.505699992, 0.524200022, 0.54339999,  0.563300014, 0.583953857, 0.60534668,
124 124
   0.627502441, 0.650482178, 0.674316406, 0.699005127, 0.724578857, 0.75112915,
125 125
   0.778625488, 0.807128906, 0.836669922, 0.86730957,  0.899078369, 0.932006836,
... ...
@@ -130,7 +130,7 @@ DECLARE_ALIGNED(16, static const float, gain_window)[FFALIGN(38, 8)]={
130 130
 };
131 131
 
132 132
 /** synthesis bandwidth broadening table */
133
-DECLARE_ALIGNED(16, static const float, syn_bw_tab)[FFALIGN(36, 8)] = {
133
+DECLARE_ALIGNED(32, static const float, syn_bw_tab)[FFALIGN(36, 16)] = {
134 134
   0.98828125,  0.976699829, 0.965254128, 0.953942537, 0.942763507, 0.931715488,
135 135
   0.920796931, 0.910006344, 0.899342179, 0.888803005, 0.878387332, 0.868093729,
136 136
   0.857920766, 0.847867012, 0.837931097, 0.828111589, 0.818407178, 0.808816493,
... ...
@@ -140,7 +140,7 @@ DECLARE_ALIGNED(16, static const float, syn_bw_tab)[FFALIGN(36, 8)] = {
140 140
 };
141 141
 
142 142
 /** gain bandwidth broadening table */
143
-DECLARE_ALIGNED(16, static const float, gain_bw_tab)[FFALIGN(10, 8)] = {
143
+DECLARE_ALIGNED(32, static const float, gain_bw_tab)[FFALIGN(10, 16)] = {
144 144
   0.90625,     0.821289063, 0.74432373,  0.674499512, 0.61126709,
145 145
   0.553955078, 0.50201416,  0.454956055, 0.41229248,  0.373657227
146 146
 };
... ...
@@ -78,8 +78,8 @@ typedef struct {
78 78
      * @name State variables
79 79
      * @{
80 80
      */
81
-    DECLARE_ALIGNED(16, float, synthesis_filterbank_samples)[SBR_SYNTHESIS_BUF_SIZE];
82
-    DECLARE_ALIGNED(16, float, analysis_filterbank_samples) [1312];
81
+    DECLARE_ALIGNED(32, float, synthesis_filterbank_samples)[SBR_SYNTHESIS_BUF_SIZE];
82
+    DECLARE_ALIGNED(32, float, analysis_filterbank_samples) [1312];
83 83
     int                synthesis_filterbank_samples_offset;
84 84
     ///l_APrev and l_A
85 85
     int                e_a[2];
... ...
@@ -31,7 +31,7 @@
31 31
 #endif
32 32
 
33 33
 #define SINETABLE(size) \
34
-    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
34
+    SINETABLE_CONST DECLARE_ALIGNED(32, float, ff_sine_##size)[size]
35 35
 
36 36
 /**
37 37
  * Generate a sine window.
... ...
@@ -2427,135 +2427,6 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2],
2427 2427
     }
2428 2428
 }
2429 2429
 
2430
-static void vector_fmul_3dnow(float *dst, const float *src0, const float *src1,
2431
-                              int len)
2432
-{
2433
-    x86_reg i = (len - 4) * 4;
2434
-    __asm__ volatile (
2435
-        "1:                             \n\t"
2436
-        "movq    (%2, %0), %%mm0        \n\t"
2437
-        "movq   8(%2, %0), %%mm1        \n\t"
2438
-        "pfmul   (%3, %0), %%mm0        \n\t"
2439
-        "pfmul  8(%3, %0), %%mm1        \n\t"
2440
-        "movq       %%mm0,  (%1, %0)    \n\t"
2441
-        "movq       %%mm1, 8(%1, %0)    \n\t"
2442
-        "sub          $16, %0           \n\t"
2443
-        "jge           1b               \n\t"
2444
-        "femms                          \n\t"
2445
-        : "+r"(i)
2446
-        : "r"(dst), "r"(src0), "r"(src1)
2447
-        : "memory"
2448
-    );
2449
-}
2450
-
2451
-static void vector_fmul_sse(float *dst, const float *src0, const float *src1,
2452
-                            int len)
2453
-{
2454
-    x86_reg i = (len - 8) * 4;
2455
-    __asm__ volatile (
2456
-        "1:                             \n\t"
2457
-        "movaps    (%2, %0), %%xmm0     \n\t"
2458
-        "movaps  16(%2, %0), %%xmm1     \n\t"
2459
-        "mulps     (%3, %0), %%xmm0     \n\t"
2460
-        "mulps   16(%3, %0), %%xmm1     \n\t"
2461
-        "movaps      %%xmm0,   (%1, %0) \n\t"
2462
-        "movaps      %%xmm1, 16(%1, %0) \n\t"
2463
-        "sub            $32, %0         \n\t"
2464
-        "jge             1b             \n\t"
2465
-        : "+r"(i)
2466
-        : "r"(dst), "r"(src0), "r"(src1)
2467
-        : "memory"
2468
-    );
2469
-}
2470
-
2471
-static void vector_fmul_reverse_3dnow2(float *dst, const float *src0,
2472
-                                       const float *src1, int len)
2473
-{
2474
-    x86_reg i = len * 4 - 16;
2475
-    __asm__ volatile (
2476
-        "1:                             \n\t"
2477
-        "pswapd     8(%1), %%mm0        \n\t"
2478
-        "pswapd      (%1), %%mm1        \n\t"
2479
-        "pfmul   (%3, %0), %%mm0        \n\t"
2480
-        "pfmul  8(%3, %0), %%mm1        \n\t"
2481
-        "movq       %%mm0,  (%2, %0)    \n\t"
2482
-        "movq       %%mm1, 8(%2, %0)    \n\t"
2483
-        "add          $16, %1           \n\t"
2484
-        "sub          $16, %0           \n\t"
2485
-        "jge           1b               \n\t"
2486
-        : "+r"(i), "+r"(src1)
2487
-        : "r"(dst), "r"(src0)
2488
-    );
2489
-    __asm__ volatile ("femms");
2490
-}
2491
-
2492
-static void vector_fmul_reverse_sse(float *dst, const float *src0,
2493
-                                    const float *src1, int len)
2494
-{
2495
-    x86_reg i = len * 4 - 32;
2496
-    __asm__ volatile (
2497
-        "1:                                 \n\t"
2498
-        "movaps         16(%1), %%xmm0      \n\t"
2499
-        "movaps           (%1), %%xmm1      \n\t"
2500
-        "shufps  $0x1b, %%xmm0, %%xmm0      \n\t"
2501
-        "shufps  $0x1b, %%xmm1, %%xmm1      \n\t"
2502
-        "mulps        (%3, %0), %%xmm0      \n\t"
2503
-        "mulps      16(%3, %0), %%xmm1      \n\t"
2504
-        "movaps         %%xmm0,   (%2, %0)  \n\t"
2505
-        "movaps         %%xmm1, 16(%2, %0)  \n\t"
2506
-        "add               $32, %1          \n\t"
2507
-        "sub               $32, %0          \n\t"
2508
-        "jge                1b              \n\t"
2509
-        : "+r"(i), "+r"(src1)
2510
-        : "r"(dst), "r"(src0)
2511
-    );
2512
-}
2513
-
2514
-static void vector_fmul_add_3dnow(float *dst, const float *src0,
2515
-                                  const float *src1, const float *src2, int len)
2516
-{
2517
-    x86_reg i = (len - 4) * 4;
2518
-    __asm__ volatile (
2519
-        "1:                             \n\t"
2520
-        "movq   (%2, %0), %%mm0         \n\t"
2521
-        "movq  8(%2, %0), %%mm1         \n\t"
2522
-        "pfmul  (%3, %0), %%mm0         \n\t"
2523
-        "pfmul 8(%3, %0), %%mm1         \n\t"
2524
-        "pfadd  (%4, %0), %%mm0         \n\t"
2525
-        "pfadd 8(%4, %0), %%mm1         \n\t"
2526
-        "movq      %%mm0,  (%1, %0)     \n\t"
2527
-        "movq      %%mm1, 8(%1, %0)     \n\t"
2528
-        "sub         $16, %0            \n\t"
2529
-        "jge          1b                \n\t"
2530
-        : "+r"(i)
2531
-        : "r"(dst), "r"(src0), "r"(src1), "r"(src2)
2532
-        : "memory"
2533
-    );
2534
-    __asm__ volatile ("femms");
2535
-}
2536
-
2537
-static void vector_fmul_add_sse(float *dst, const float *src0,
2538
-                                const float *src1, const float *src2, int len)
2539
-{
2540
-    x86_reg i = (len - 8) * 4;
2541
-    __asm__ volatile (
2542
-        "1:                             \n\t"
2543
-        "movaps   (%2, %0), %%xmm0      \n\t"
2544
-        "movaps 16(%2, %0), %%xmm1      \n\t"
2545
-        "mulps    (%3, %0), %%xmm0      \n\t"
2546
-        "mulps  16(%3, %0), %%xmm1      \n\t"
2547
-        "addps    (%4, %0), %%xmm0      \n\t"
2548
-        "addps  16(%4, %0), %%xmm1      \n\t"
2549
-        "movaps     %%xmm0,   (%1, %0)  \n\t"
2550
-        "movaps     %%xmm1, 16(%1, %0)  \n\t"
2551
-        "sub           $32, %0          \n\t"
2552
-        "jge            1b              \n\t"
2553
-        : "+r"(i)
2554
-        : "r"(dst), "r"(src0), "r"(src1), "r"(src2)
2555
-        : "memory"
2556
-    );
2557
-}
2558
-
2559 2430
 #if HAVE_6REGS
2560 2431
 static void vector_fmul_window_3dnow2(float *dst, const float *src0,
2561 2432
                                       const float *src1, const float *win,
... ...
@@ -2710,6 +2581,21 @@ int  ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src,
2710 2710
 
2711 2711
 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2712 2712
 
2713
+void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
2714
+                        int len);
2715
+void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
2716
+                        int len);
2717
+
2718
+void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
2719
+                                const float *src1, int len);
2720
+void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
2721
+                                const float *src1, int len);
2722
+
2723
+void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
2724
+                            const float *src2, int len);
2725
+void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1,
2726
+                            const float *src2, int len);
2727
+
2713 2728
 void ff_vector_clip_int32_mmx     (int32_t *dst, const int32_t *src,
2714 2729
                                    int32_t min, int32_t max, unsigned int len);
2715 2730
 void ff_vector_clip_int32_sse2    (int32_t *dst, const int32_t *src,
... ...
@@ -3000,8 +2886,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
3000 3000
 #endif
3001 3001
 
3002 3002
     c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
3003
-    c->vector_fmul             = vector_fmul_3dnow;
3004
-    c->vector_fmul_add         = vector_fmul_add_3dnow;
3005 3003
 
3006 3004
 #if HAVE_7REGS
3007 3005
     c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
... ...
@@ -3011,7 +2895,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
3011 3011
 static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx,
3012 3012
                                 int mm_flags)
3013 3013
 {
3014
-    c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
3015 3014
 #if HAVE_6REGS
3016 3015
     c->vector_fmul_window  = vector_fmul_window_3dnow2;
3017 3016
 #endif
... ...
@@ -3031,11 +2914,11 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
3031 3031
 
3032 3032
     c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
3033 3033
     c->ac3_downmix             = ac3_downmix_sse;
3034
-    c->vector_fmul             = vector_fmul_sse;
3035
-    c->vector_fmul_reverse     = vector_fmul_reverse_sse;
3036
-
3037
-    if (!(mm_flags & AV_CPU_FLAG_3DNOW))
3038
-        c->vector_fmul_add = vector_fmul_add_sse;
3034
+#if HAVE_YASM
3035
+    c->vector_fmul         = ff_vector_fmul_sse;
3036
+    c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
3037
+    c->vector_fmul_add     = ff_vector_fmul_add_sse;
3038
+#endif
3039 3039
 
3040 3040
 #if HAVE_6REGS
3041 3041
     c->vector_fmul_window = vector_fmul_window_sse;
... ...
@@ -3194,6 +3077,9 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
3194 3194
         }
3195 3195
     }
3196 3196
     c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
3197
+    c->vector_fmul = ff_vector_fmul_avx;
3198
+    c->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
3199
+    c->vector_fmul_add = ff_vector_fmul_add_avx;
3197 3200
 #endif
3198 3201
 }
3199 3202
 
... ...
@@ -1130,6 +1130,111 @@ VECTOR_CLIP_INT32 6, 1, 0, 0
1130 1130
 %endif
1131 1131
 
1132 1132
 ;-----------------------------------------------------------------------------
1133
+; void vector_fmul(float *dst, const float *src0, const float *src1, int len)
1134
+;-----------------------------------------------------------------------------
1135
+%macro VECTOR_FMUL 0
1136
+cglobal vector_fmul, 4,4,2, dst, src0, src1, len
1137
+    lea       lenq, [lend*4 - 2*mmsize]
1138
+ALIGN 16
1139
+.loop
1140
+    mova      m0,   [src0q + lenq]
1141
+    mova      m1,   [src0q + lenq + mmsize]
1142
+    mulps     m0, m0, [src1q + lenq]
1143
+    mulps     m1, m1, [src1q + lenq + mmsize]
1144
+    mova      [dstq + lenq], m0
1145
+    mova      [dstq + lenq + mmsize], m1
1146
+
1147
+    sub       lenq, 2*mmsize
1148
+    jge       .loop
1149
+%if mmsize == 32
1150
+    vzeroupper
1151
+    RET
1152
+%else
1153
+    REP_RET
1154
+%endif
1155
+%endmacro
1156
+
1157
+INIT_XMM sse
1158
+VECTOR_FMUL
1159
+INIT_YMM avx
1160
+VECTOR_FMUL
1161
+
1162
+;-----------------------------------------------------------------------------
1163
+; void vector_fmul_reverse(float *dst, const float *src0, const float *src1,
1164
+;                          int len)
1165
+;-----------------------------------------------------------------------------
1166
+%macro VECTOR_FMUL_REVERSE 0
1167
+cglobal vector_fmul_reverse, 4,4,2, dst, src0, src1, len
1168
+    lea       lenq, [lend*4 - 2*mmsize]
1169
+ALIGN 16
1170
+.loop
1171
+%if cpuflag(avx)
1172
+    vmovaps     xmm0, [src1q + 16]
1173
+    vinsertf128 m0, m0, [src1q], 1
1174
+    vshufps     m0, m0, m0, q0123
1175
+    vmovaps     xmm1, [src1q + mmsize + 16]
1176
+    vinsertf128 m1, m1, [src1q + mmsize], 1
1177
+    vshufps     m1, m1, m1, q0123
1178
+%else
1179
+    mova    m0, [src1q]
1180
+    mova    m1, [src1q + mmsize]
1181
+    shufps  m0, m0, q0123
1182
+    shufps  m1, m1, q0123
1183
+%endif
1184
+    mulps   m0, m0, [src0q + lenq + mmsize]
1185
+    mulps   m1, m1, [src0q + lenq]
1186
+    mova    [dstq + lenq + mmsize], m0
1187
+    mova    [dstq + lenq], m1
1188
+    add     src1q, 2*mmsize
1189
+    sub     lenq,  2*mmsize
1190
+    jge     .loop
1191
+%if mmsize == 32
1192
+    vzeroupper
1193
+    RET
1194
+%else
1195
+    REP_RET
1196
+%endif
1197
+%endmacro
1198
+
1199
+INIT_XMM sse
1200
+VECTOR_FMUL_REVERSE
1201
+INIT_YMM avx
1202
+VECTOR_FMUL_REVERSE
1203
+
1204
+;-----------------------------------------------------------------------------
1205
+; vector_fmul_add(float *dst, const float *src0, const float *src1,
1206
+;                 const float *src2, int len)
1207
+;-----------------------------------------------------------------------------
1208
+%macro VECTOR_FMUL_ADD 0
1209
+cglobal vector_fmul_add, 5,5,2, dst, src0, src1, src2, len
1210
+    lea       lenq, [lend*4 - 2*mmsize]
1211
+ALIGN 16
1212
+.loop
1213
+    mova    m0,   [src0q + lenq]
1214
+    mova    m1,   [src0q + lenq + mmsize]
1215
+    mulps   m0, m0, [src1q + lenq]
1216
+    mulps   m1, m1, [src1q + lenq + mmsize]
1217
+    addps   m0, m0, [src2q + lenq]
1218
+    addps   m1, m1, [src2q + lenq + mmsize]
1219
+    mova    [dstq + lenq], m0
1220
+    mova    [dstq + lenq + mmsize], m1
1221
+
1222
+    sub     lenq,   2*mmsize
1223
+    jge     .loop
1224
+%if mmsize == 32
1225
+    vzeroupper
1226
+    RET
1227
+%else
1228
+    REP_RET
1229
+%endif
1230
+%endmacro
1231
+
1232
+INIT_XMM sse
1233
+VECTOR_FMUL_ADD
1234
+INIT_YMM avx
1235
+VECTOR_FMUL_ADD
1236
+
1237
+;-----------------------------------------------------------------------------
1133 1238
 ; void ff_butterflies_float_interleave(float *dst, const float *src0,
1134 1239
 ;                                      const float *src1, int len);
1135 1240
 ;-----------------------------------------------------------------------------
... ...
@@ -50,7 +50,7 @@ OBJS-$(CONFIG_AMERGE_FILTER)                 += af_amerge.o
50 50
 OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
51 51
 OBJS-$(CONFIG_ARESAMPLE_FILTER)              += af_aresample.o
52 52
 OBJS-$(CONFIG_ASHOWINFO_FILTER)              += af_ashowinfo.o
53
-OBJS-$(CONFIG_ASPLIT_FILTER)                 += f_split.o
53
+OBJS-$(CONFIG_ASPLIT_FILTER)                 += split.o
54 54
 OBJS-$(CONFIG_ASTREAMSYNC_FILTER)            += af_astreamsync.o
55 55
 OBJS-$(CONFIG_ASYNCTS_FILTER)                += af_asyncts.o
56 56
 OBJS-$(CONFIG_EARWAX_FILTER)                 += af_earwax.o
... ...
@@ -109,7 +109,7 @@ OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
109 109
 OBJS-$(CONFIG_SETTB_FILTER)                  += vf_settb.o
110 110
 OBJS-$(CONFIG_SHOWINFO_FILTER)               += vf_showinfo.o
111 111
 OBJS-$(CONFIG_SLICIFY_FILTER)                += vf_slicify.o
112
-OBJS-$(CONFIG_SPLIT_FILTER)                  += f_split.o
112
+OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
113 113
 OBJS-$(CONFIG_SUPER2XSAI_FILTER)             += vf_super2xsai.o
114 114
 OBJS-$(CONFIG_SWAPUV_FILTER)                 += vf_swapuv.o
115 115
 OBJS-$(CONFIG_THUMBNAIL_FILTER)              += vf_thumbnail.o
... ...
@@ -119,7 +119,7 @@ static int config_output(AVFilterLink *outlink)
119 119
     av_get_channel_layout_string(buf2, sizeof(buf2),
120 120
                                  -1, outlink->channel_layout);
121 121
     av_log(ctx, AV_LOG_VERBOSE,
122
-           "fmt:%s srate: %d cl:%s -> fmt:%s srate: %d cl:%s\n",
122
+           "fmt:%s srate:%d cl:%s -> fmt:%s srate:%d cl:%s\n",
123 123
            av_get_sample_fmt_name(inlink ->format), inlink ->sample_rate, buf1,
124 124
            av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf2);
125 125
 
126 126
deleted file mode 100644
... ...
@@ -1,138 +0,0 @@
1
-/*
2
- * Copyright (c) 2007 Bobby Bingham
3
- *
4
- * This file is part of FFmpeg.
5
- *
6
- * FFmpeg is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU Lesser General Public
8
- * License as published by the Free Software Foundation; either
9
- * version 2.1 of the License, or (at your option) any later version.
10
- *
11
- * FFmpeg is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
- * Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public
17
- * License along with FFmpeg; if not, write to the Free Software
18
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
- */
20
-
21
-/**
22
- * @file
23
- * audio and video splitter
24
- */
25
-
26
-#include "avfilter.h"
27
-#include "audio.h"
28
-
29
-static int split_init(AVFilterContext *ctx, const char *args, void *opaque)
30
-{
31
-    int i, nb_outputs = 2;
32
-
33
-    if (args) {
34
-        nb_outputs = strtol(args, NULL, 0);
35
-        if (nb_outputs <= 0) {
36
-            av_log(ctx, AV_LOG_ERROR, "Invalid number of outputs specified: %d.\n",
37
-                   nb_outputs);
38
-            return AVERROR(EINVAL);
39
-        }
40
-    }
41
-
42
-    for (i = 0; i < nb_outputs; i++) {
43
-        char name[32];
44
-        AVFilterPad pad = { 0 };
45
-
46
-        snprintf(name, sizeof(name), "output%d", i);
47
-        pad.type = ctx->filter->inputs[0].type;
48
-        pad.name = av_strdup(name);
49
-
50
-        avfilter_insert_outpad(ctx, i, &pad);
51
-    }
52
-
53
-    return 0;
54
-}
55
-
56
-static void split_uninit(AVFilterContext *ctx)
57
-{
58
-    int i;
59
-
60
-    for (i = 0; i < ctx->output_count; i++)
61
-        av_freep(&ctx->output_pads[i].name);
62
-}
63
-
64
-static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
65
-{
66
-    AVFilterContext *ctx = inlink->dst;
67
-    int i;
68
-
69
-    for (i = 0; i < ctx->output_count; i++)
70
-        avfilter_start_frame(ctx->outputs[i],
71
-                             avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
72
-}
73
-
74
-static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
75
-{
76
-    AVFilterContext *ctx = inlink->dst;
77
-    int i;
78
-
79
-    for (i = 0; i < ctx->output_count; i++)
80
-        avfilter_draw_slice(ctx->outputs[i], y, h, slice_dir);
81
-}
82
-
83
-static void end_frame(AVFilterLink *inlink)
84
-{
85
-    AVFilterContext *ctx = inlink->dst;
86
-    int i;
87
-
88
-    for (i = 0; i < ctx->output_count; i++)
89
-        avfilter_end_frame(ctx->outputs[i]);
90
-
91
-    avfilter_unref_buffer(inlink->cur_buf);
92
-}
93
-
94
-AVFilter avfilter_vf_split = {
95
-    .name      = "split",
96
-    .description = NULL_IF_CONFIG_SMALL("Pass on the input to two outputs."),
97
-
98
-    .init   = split_init,
99
-    .uninit = split_uninit,
100
-
101
-    .inputs    = (const AVFilterPad[]) {{ .name      = "default",
102
-                                    .type            = AVMEDIA_TYPE_VIDEO,
103
-                                    .get_video_buffer= avfilter_null_get_video_buffer,
104
-                                    .start_frame     = start_frame,
105
-                                    .draw_slice      = draw_slice,
106
-                                    .end_frame       = end_frame, },
107
-                                  { .name = NULL}},
108
-    .outputs   = (AVFilterPad[]) {{ .name = NULL}},
109
-};
110
-
111
-static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
112
-{
113
-    AVFilterContext *ctx = inlink->dst;
114
-    int i;
115
-
116
-    for (i = 0; i < ctx->output_count; i++)
117
-        ff_filter_samples(inlink->dst->outputs[i],
118
-                          avfilter_ref_buffer(samplesref, ~AV_PERM_WRITE));
119
-}
120
-
121
-AVFilter avfilter_af_asplit = {
122
-    .name      = "asplit",
123
-    .description = NULL_IF_CONFIG_SMALL("Pass on the audio input to N audio outputs."),
124
-
125
-    .init   = split_init,
126
-    .uninit = split_uninit,
127
-
128
-    .inputs = (const AVFilterPad[]) {
129
-        {
130
-            .name             = "default",
131
-            .type             = AVMEDIA_TYPE_AUDIO,
132
-            .get_audio_buffer = ff_null_get_audio_buffer,
133
-            .filter_samples   = filter_samples,
134
-        },
135
-        { .name = NULL }
136
-    },
137
-    .outputs = (const AVFilterPad[]) {{ .name = NULL }},
138
-};
139 1
new file mode 100644
... ...
@@ -0,0 +1,134 @@
0
+/*
1
+ * Copyright (c) 2007 Bobby Bingham
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+/**
21
+ * @file
22
+ * audio and video splitter
23
+ */
24
+
25
+#include "avfilter.h"
26
+#include "audio.h"
27
+
28
+static int split_init(AVFilterContext *ctx, const char *args, void *opaque)
29
+{
30
+    int i, nb_outputs = 2;
31
+
32
+    if (args) {
33
+        nb_outputs = strtol(args, NULL, 0);
34
+        if (nb_outputs <= 0) {
35
+            av_log(ctx, AV_LOG_ERROR, "Invalid number of outputs specified: %d.\n",
36
+                   nb_outputs);
37
+            return AVERROR(EINVAL);
38
+        }
39
+    }
40
+
41
+    for (i = 0; i < nb_outputs; i++) {
42
+        char name[32];
43
+        AVFilterPad pad = { 0 };
44
+
45
+        snprintf(name, sizeof(name), "output%d", i);
46
+        pad.type = ctx->filter->inputs[0].type;
47
+        pad.name = av_strdup(name);
48
+
49
+        avfilter_insert_outpad(ctx, i, &pad);
50
+    }
51
+
52
+    return 0;
53
+}
54
+
55
+static void split_uninit(AVFilterContext *ctx)
56
+{
57
+    int i;
58
+
59
+    for (i = 0; i < ctx->output_count; i++)
60
+        av_freep(&ctx->output_pads[i].name);
61
+}
62
+
63
+static void start_frame(AVFilterLink *inlink, AVFilterBufferRef *picref)
64
+{
65
+    AVFilterContext *ctx = inlink->dst;
66
+    int i;
67
+
68
+    for (i = 0; i < ctx->output_count; i++)
69
+        avfilter_start_frame(ctx->outputs[i],
70
+                             avfilter_ref_buffer(picref, ~AV_PERM_WRITE));
71
+}
72
+
73
+static void draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir)
74
+{
75
+    AVFilterContext *ctx = inlink->dst;
76
+    int i;
77
+
78
+    for (i = 0; i < ctx->output_count; i++)
79
+        avfilter_draw_slice(ctx->outputs[i], y, h, slice_dir);
80
+}
81
+
82
+static void end_frame(AVFilterLink *inlink)
83
+{
84
+    AVFilterContext *ctx = inlink->dst;
85
+    int i;
86
+
87
+    for (i = 0; i < ctx->output_count; i++)
88
+        avfilter_end_frame(ctx->outputs[i]);
89
+
90
+    avfilter_unref_buffer(inlink->cur_buf);
91
+}
92
+
93
+AVFilter avfilter_vf_split = {
94
+    .name      = "split",
95
+    .description = NULL_IF_CONFIG_SMALL("Pass on the input to two outputs."),
96
+
97
+    .init   = split_init,
98
+    .uninit = split_uninit,
99
+
100
+    .inputs    = (const AVFilterPad[]) {{ .name      = "default",
101
+                                    .type            = AVMEDIA_TYPE_VIDEO,
102
+                                    .get_video_buffer= avfilter_null_get_video_buffer,
103
+                                    .start_frame     = start_frame,
104
+                                    .draw_slice      = draw_slice,
105
+                                    .end_frame       = end_frame, },
106
+                                  { .name = NULL}},
107
+    .outputs   = (AVFilterPad[]) {{ .name = NULL}},
108
+};
109
+
110
+static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
111
+{
112
+    AVFilterContext *ctx = inlink->dst;
113
+    int i;
114
+
115
+    for (i = 0; i < ctx->output_count; i++)
116
+        ff_filter_samples(inlink->dst->outputs[i],
117
+                          avfilter_ref_buffer(samplesref, ~AV_PERM_WRITE));
118
+}
119
+
120
+AVFilter avfilter_af_asplit = {
121
+    .name        = "asplit",
122
+    .description = NULL_IF_CONFIG_SMALL("Pass on the audio input to N audio outputs."),
123
+
124
+    .init   = split_init,
125
+    .uninit = split_uninit,
126
+
127
+    .inputs  = (const AVFilterPad[]) {{ .name             = "default",
128
+                                        .type             = AVMEDIA_TYPE_AUDIO,
129
+                                        .get_audio_buffer = ff_null_get_audio_buffer,
130
+                                        .filter_samples   = filter_samples },
131
+                                      { .name = NULL }},
132
+    .outputs = (const AVFilterPad[]) {{ .name = NULL }},
133
+};
... ...
@@ -29,7 +29,7 @@
29 29
 #include "libavutil/avutil.h"
30 30
 
31 31
 #define LIBAVFILTER_VERSION_MAJOR  2
32
-#define LIBAVFILTER_VERSION_MINOR 75
32
+#define LIBAVFILTER_VERSION_MINOR 76
33 33
 #define LIBAVFILTER_VERSION_MICRO 100
34 34
 
35 35
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \