Browse code

float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil

Justin Ruggles authored on 2012/06/09 02:49:56
Showing 9 changed files
... ...
@@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
154 154
                                 const float *src1, const float *win, int len);
155 155
 void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
156 156
                                 int len);
157
-void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
158
-                                int len);
159 157
 void ff_butterflies_float_neon(float *v1, float *v2, int len);
160 158
 float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
161 159
 void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
... ...
@@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
329 329
 
330 330
     c->vector_fmul_window         = ff_vector_fmul_window_neon;
331 331
     c->vector_fmul_scalar         = ff_vector_fmul_scalar_neon;
332
-    c->vector_fmac_scalar         = ff_vector_fmac_scalar_neon;
333 332
     c->butterflies_float          = ff_butterflies_float_neon;
334 333
     c->scalarproduct_float        = ff_scalarproduct_float_neon;
335 334
     c->vector_fmul_reverse        = ff_vector_fmul_reverse_neon;
... ...
@@ -682,54 +682,6 @@ NOVFP   vdup.32         q8,  r2
682 682
         .unreq          len
683 683
 endfunc
684 684
 
685
-function ff_vector_fmac_scalar_neon, export=1
686
-VFP     len .req r2
687
-VFP     acc .req r3
688
-NOVFP   len .req r3
689
-NOVFP   acc .req r2
690
-VFP     vdup.32         q15, d0[0]
691
-NOVFP   vdup.32         q15, r2
692
-        bics            r12, len, #15
693
-        mov             acc, r0
694
-        beq             3f
695
-        vld1.32         {q0},     [r1,:128]!
696
-        vld1.32         {q8},     [acc,:128]!
697
-        vld1.32         {q1},     [r1,:128]!
698
-        vld1.32         {q9},     [acc,:128]!
699
-1:      vmla.f32        q8,  q0,  q15
700
-        vld1.32         {q2},     [r1,:128]!
701
-        vld1.32         {q10},    [acc,:128]!
702
-        vmla.f32        q9,  q1,  q15
703
-        vld1.32         {q3},     [r1,:128]!
704
-        vld1.32         {q11},    [acc,:128]!
705
-        vmla.f32        q10, q2,  q15
706
-        vst1.32         {q8},     [r0,:128]!
707
-        vmla.f32        q11, q3,  q15
708
-        vst1.32         {q9},     [r0,:128]!
709
-        subs            r12, r12, #16
710
-        beq             2f
711
-        vld1.32         {q0},     [r1,:128]!
712
-        vld1.32         {q8},     [acc,:128]!
713
-        vst1.32         {q10},    [r0,:128]!
714
-        vld1.32         {q1},     [r1,:128]!
715
-        vld1.32         {q9},     [acc,:128]!
716
-        vst1.32         {q11},    [r0,:128]!
717
-        b               1b
718
-2:      vst1.32         {q10},    [r0,:128]!
719
-        vst1.32         {q11},    [r0,:128]!
720
-        ands            len, len, #15
721
-        it              eq
722
-        bxeq            lr
723
-3:      vld1.32         {q0},     [r1,:128]!
724
-        vld1.32         {q8},     [acc,:128]!
725
-        vmla.f32        q8,  q0,  q15
726
-        vst1.32         {q8},     [r0,:128]!
727
-        subs            len, len, #4
728
-        bgt             3b
729
-        bx              lr
730
-        .unreq          len
731
-endfunc
732
-
733 685
 function ff_butterflies_float_neon, export=1
734 686
 1:      vld1.32         {q0},[r0,:128]
735 687
         vld1.32         {q1},[r1,:128]
... ...
@@ -27,6 +27,7 @@
27 27
 #include <stdio.h>
28 28
 
29 29
 #include "libavutil/common.h"
30
+#include "libavutil/float_dsp.h"
30 31
 #include "libavutil/intmath.h"
31 32
 #include "libavutil/intreadwrite.h"
32 33
 #include "libavutil/mathematics.h"
... ...
@@ -383,7 +384,7 @@ typedef struct {
383 383
     int profile;
384 384
 
385 385
     int debug_flag;             ///< used for suppressing repeated error messages output
386
-    DSPContext dsp;
386
+    AVFloatDSPContext fdsp;
387 387
     FFTContext imdct;
388 388
     SynthFilterContext synth;
389 389
     DCADSPContext dcadsp;
... ...
@@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
1865 1865
             float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel]     * 256;
1866 1866
             float *lt_chan   = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256;
1867 1867
             float *rt_chan   = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256;
1868
-            s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
1869
-            s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
1868
+            s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
1869
+            s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
1870 1870
         }
1871 1871
 
1872 1872
         if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
... ...
@@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx)
1908 1908
     s->avctx = avctx;
1909 1909
     dca_init_vlcs();
1910 1910
 
1911
-    ff_dsputil_init(&s->dsp, avctx);
1911
+    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
1912 1912
     ff_mdct_init(&s->imdct, 6, 1, 1.0);
1913 1913
     ff_synth_filter_init(&s->synth);
1914 1914
     ff_dcadsp_init(&s->dcadsp);
... ...
@@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2401 2401
         dst[i] = src[i] * mul;
2402 2402
 }
2403 2403
 
2404
-static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
2405
-                                 int len)
2406
-{
2407
-    int i;
2408
-    for (i = 0; i < len; i++)
2409
-        dst[i] += src[i] * mul;
2410
-}
2411
-
2412 2404
 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2413 2405
                                 int len)
2414 2406
 {
... ...
@@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2904 2904
     c->butterflies_float = butterflies_float_c;
2905 2905
     c->butterflies_float_interleave = butterflies_float_interleave_c;
2906 2906
     c->vector_fmul_scalar = vector_fmul_scalar_c;
2907
-    c->vector_fmac_scalar = vector_fmac_scalar_c;
2908 2907
 
2909 2908
     c->shrink[0]= av_image_copy_plane;
2910 2909
     c->shrink[1]= ff_shrink22;
... ...
@@ -417,17 +417,6 @@ typedef struct DSPContext {
417 417
     void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
418 418
                                int len);
419 419
     /**
420
-     * Multiply a vector of floats by a scalar float and add to
421
-     * destination vector.  Source and destination vectors must
422
-     * overlap exactly or not at all.
423
-     * @param dst result vector, 16-byte aligned
424
-     * @param src input vector, 16-byte aligned
425
-     * @param mul scalar value
426
-     * @param len length of vector, multiple of 4
427
-     */
428
-    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
429
-                               int len);
430
-    /**
431 420
      * Calculate the scalar product of two vectors of floats.
432 421
      * @param v1  first vector, 16-byte aligned
433 422
      * @param v2  second vector, 16-byte aligned
... ...
@@ -26,7 +26,11 @@
26 26
 
27 27
 void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
28 28
 
29
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
30
+                                int len);
31
+
29 32
 void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
30 33
 {
31 34
     fdsp->vector_fmul = ff_vector_fmul_neon;
35
+    fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
32 36
 }
... ...
@@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1
62 62
 3:      vst1.32         {d16-d19},[r0,:128]!
63 63
         bx              lr
64 64
 endfunc
65
+
66
+function ff_vector_fmac_scalar_neon, export=1
67
+VFP     len .req r2
68
+VFP     acc .req r3
69
+NOVFP   len .req r3
70
+NOVFP   acc .req r2
71
+VFP     vdup.32         q15, d0[0]
72
+NOVFP   vdup.32         q15, r2
73
+        bics            r12, len, #15
74
+        mov             acc, r0
75
+        beq             3f
76
+        vld1.32         {q0},     [r1,:128]!
77
+        vld1.32         {q8},     [acc,:128]!
78
+        vld1.32         {q1},     [r1,:128]!
79
+        vld1.32         {q9},     [acc,:128]!
80
+1:      vmla.f32        q8,  q0,  q15
81
+        vld1.32         {q2},     [r1,:128]!
82
+        vld1.32         {q10},    [acc,:128]!
83
+        vmla.f32        q9,  q1,  q15
84
+        vld1.32         {q3},     [r1,:128]!
85
+        vld1.32         {q11},    [acc,:128]!
86
+        vmla.f32        q10, q2,  q15
87
+        vst1.32         {q8},     [r0,:128]!
88
+        vmla.f32        q11, q3,  q15
89
+        vst1.32         {q9},     [r0,:128]!
90
+        subs            r12, r12, #16
91
+        beq             2f
92
+        vld1.32         {q0},     [r1,:128]!
93
+        vld1.32         {q8},     [acc,:128]!
94
+        vst1.32         {q10},    [r0,:128]!
95
+        vld1.32         {q1},     [r1,:128]!
96
+        vld1.32         {q9},     [acc,:128]!
97
+        vst1.32         {q11},    [r0,:128]!
98
+        b               1b
99
+2:      vst1.32         {q10},    [r0,:128]!
100
+        vst1.32         {q11},    [r0,:128]!
101
+        ands            len, len, #15
102
+        it              eq
103
+        bxeq            lr
104
+3:      vld1.32         {q0},     [r1,:128]!
105
+        vld1.32         {q8},     [acc,:128]!
106
+        vmla.f32        q8,  q0,  q15
107
+        vst1.32         {q8},     [r0,:128]!
108
+        subs            len, len, #4
109
+        bgt             3b
110
+        bx              lr
111
+        .unreq          len
112
+endfunc
... ...
@@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
28 28
         dst[i] = src0[i] * src1[i];
29 29
 }
30 30
 
31
+static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
32
+                                 int len)
33
+{
34
+    int i;
35
+    for (i = 0; i < len; i++)
36
+        dst[i] += src[i] * mul;
37
+}
38
+
31 39
 void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
32 40
 {
33 41
     fdsp->vector_fmul = vector_fmul_c;
42
+    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
34 43
 
35 44
 #if ARCH_ARM
36 45
     ff_float_dsp_init_arm(fdsp);
... ...
@@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext {
35 35
      */
36 36
     void (*vector_fmul)(float *dst, const float *src0, const float *src1,
37 37
                         int len);
38
+
39
+    /**
40
+     * Multiply a vector of floats by a scalar float and add to
41
+     * destination vector.  Source and destination vectors must
42
+     * overlap exactly or not at all.
43
+     *
44
+     * @param dst result vector
45
+     *            constraints: 16-byte aligned
46
+     * @param src input vector
47
+     *            constraints: 16-byte aligned
48
+     * @param mul scalar value
49
+     * @param len length of vector
50
+     *            constraints: multiple of 4
51
+     */
52
+    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
53
+                               int len);
38 54
 } AVFloatDSPContext;
39 55
 
40 56
 /**