... | ... |
@@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0, |
154 | 154 |
const float *src1, const float *win, int len); |
155 | 155 |
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, |
156 | 156 |
int len); |
157 |
-void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, |
|
158 |
- int len); |
|
159 | 157 |
void ff_butterflies_float_neon(float *v1, float *v2, int len); |
160 | 158 |
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); |
161 | 159 |
void ff_vector_fmul_reverse_neon(float *dst, const float *src0, |
... | ... |
@@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) |
329 | 329 |
|
330 | 330 |
c->vector_fmul_window = ff_vector_fmul_window_neon; |
331 | 331 |
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; |
332 |
- c->vector_fmac_scalar = ff_vector_fmac_scalar_neon; |
|
333 | 332 |
c->butterflies_float = ff_butterflies_float_neon; |
334 | 333 |
c->scalarproduct_float = ff_scalarproduct_float_neon; |
335 | 334 |
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; |
... | ... |
@@ -682,54 +682,6 @@ NOVFP vdup.32 q8, r2 |
682 | 682 |
.unreq len |
683 | 683 |
endfunc |
684 | 684 |
|
685 |
-function ff_vector_fmac_scalar_neon, export=1 |
|
686 |
-VFP len .req r2 |
|
687 |
-VFP acc .req r3 |
|
688 |
-NOVFP len .req r3 |
|
689 |
-NOVFP acc .req r2 |
|
690 |
-VFP vdup.32 q15, d0[0] |
|
691 |
-NOVFP vdup.32 q15, r2 |
|
692 |
- bics r12, len, #15 |
|
693 |
- mov acc, r0 |
|
694 |
- beq 3f |
|
695 |
- vld1.32 {q0}, [r1,:128]! |
|
696 |
- vld1.32 {q8}, [acc,:128]! |
|
697 |
- vld1.32 {q1}, [r1,:128]! |
|
698 |
- vld1.32 {q9}, [acc,:128]! |
|
699 |
-1: vmla.f32 q8, q0, q15 |
|
700 |
- vld1.32 {q2}, [r1,:128]! |
|
701 |
- vld1.32 {q10}, [acc,:128]! |
|
702 |
- vmla.f32 q9, q1, q15 |
|
703 |
- vld1.32 {q3}, [r1,:128]! |
|
704 |
- vld1.32 {q11}, [acc,:128]! |
|
705 |
- vmla.f32 q10, q2, q15 |
|
706 |
- vst1.32 {q8}, [r0,:128]! |
|
707 |
- vmla.f32 q11, q3, q15 |
|
708 |
- vst1.32 {q9}, [r0,:128]! |
|
709 |
- subs r12, r12, #16 |
|
710 |
- beq 2f |
|
711 |
- vld1.32 {q0}, [r1,:128]! |
|
712 |
- vld1.32 {q8}, [acc,:128]! |
|
713 |
- vst1.32 {q10}, [r0,:128]! |
|
714 |
- vld1.32 {q1}, [r1,:128]! |
|
715 |
- vld1.32 {q9}, [acc,:128]! |
|
716 |
- vst1.32 {q11}, [r0,:128]! |
|
717 |
- b 1b |
|
718 |
-2: vst1.32 {q10}, [r0,:128]! |
|
719 |
- vst1.32 {q11}, [r0,:128]! |
|
720 |
- ands len, len, #15 |
|
721 |
- it eq |
|
722 |
- bxeq lr |
|
723 |
-3: vld1.32 {q0}, [r1,:128]! |
|
724 |
- vld1.32 {q8}, [acc,:128]! |
|
725 |
- vmla.f32 q8, q0, q15 |
|
726 |
- vst1.32 {q8}, [r0,:128]! |
|
727 |
- subs len, len, #4 |
|
728 |
- bgt 3b |
|
729 |
- bx lr |
|
730 |
- .unreq len |
|
731 |
-endfunc |
|
732 |
- |
|
733 | 685 |
function ff_butterflies_float_neon, export=1 |
734 | 686 |
1: vld1.32 {q0},[r0,:128] |
735 | 687 |
vld1.32 {q1},[r1,:128] |
... | ... |
@@ -27,6 +27,7 @@ |
27 | 27 |
#include <stdio.h> |
28 | 28 |
|
29 | 29 |
#include "libavutil/common.h" |
30 |
+#include "libavutil/float_dsp.h" |
|
30 | 31 |
#include "libavutil/intmath.h" |
31 | 32 |
#include "libavutil/intreadwrite.h" |
32 | 33 |
#include "libavutil/mathematics.h" |
... | ... |
@@ -383,7 +384,7 @@ typedef struct { |
383 | 383 |
int profile; |
384 | 384 |
|
385 | 385 |
int debug_flag; ///< used for suppressing repeated error messages output |
386 |
- DSPContext dsp; |
|
386 |
+ AVFloatDSPContext fdsp; |
|
387 | 387 |
FFTContext imdct; |
388 | 388 |
SynthFilterContext synth; |
389 | 389 |
DCADSPContext dcadsp; |
... | ... |
@@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data, |
1865 | 1865 |
float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel] * 256; |
1866 | 1866 |
float *lt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256; |
1867 | 1867 |
float *rt_chan = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256; |
1868 |
- s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256); |
|
1869 |
- s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256); |
|
1868 |
+ s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256); |
|
1869 |
+ s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256); |
|
1870 | 1870 |
} |
1871 | 1871 |
|
1872 | 1872 |
if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) { |
... | ... |
@@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx) |
1908 | 1908 |
s->avctx = avctx; |
1909 | 1909 |
dca_init_vlcs(); |
1910 | 1910 |
|
1911 |
- ff_dsputil_init(&s->dsp, avctx); |
|
1911 |
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
1912 | 1912 |
ff_mdct_init(&s->imdct, 6, 1, 1.0); |
1913 | 1913 |
ff_synth_filter_init(&s->synth); |
1914 | 1914 |
ff_dcadsp_init(&s->dcadsp); |
... | ... |
@@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul, |
2401 | 2401 |
dst[i] = src[i] * mul; |
2402 | 2402 |
} |
2403 | 2403 |
|
2404 |
-static void vector_fmac_scalar_c(float *dst, const float *src, float mul, |
|
2405 |
- int len) |
|
2406 |
-{ |
|
2407 |
- int i; |
|
2408 |
- for (i = 0; i < len; i++) |
|
2409 |
- dst[i] += src[i] * mul; |
|
2410 |
-} |
|
2411 |
- |
|
2412 | 2404 |
static void butterflies_float_c(float *restrict v1, float *restrict v2, |
2413 | 2405 |
int len) |
2414 | 2406 |
{ |
... | ... |
@@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) |
2904 | 2904 |
c->butterflies_float = butterflies_float_c; |
2905 | 2905 |
c->butterflies_float_interleave = butterflies_float_interleave_c; |
2906 | 2906 |
c->vector_fmul_scalar = vector_fmul_scalar_c; |
2907 |
- c->vector_fmac_scalar = vector_fmac_scalar_c; |
|
2908 | 2907 |
|
2909 | 2908 |
c->shrink[0]= av_image_copy_plane; |
2910 | 2909 |
c->shrink[1]= ff_shrink22; |
... | ... |
@@ -417,17 +417,6 @@ typedef struct DSPContext { |
417 | 417 |
void (*vector_fmul_scalar)(float *dst, const float *src, float mul, |
418 | 418 |
int len); |
419 | 419 |
/** |
420 |
- * Multiply a vector of floats by a scalar float and add to |
|
421 |
- * destination vector. Source and destination vectors must |
|
422 |
- * overlap exactly or not at all. |
|
423 |
- * @param dst result vector, 16-byte aligned |
|
424 |
- * @param src input vector, 16-byte aligned |
|
425 |
- * @param mul scalar value |
|
426 |
- * @param len length of vector, multiple of 4 |
|
427 |
- */ |
|
428 |
- void (*vector_fmac_scalar)(float *dst, const float *src, float mul, |
|
429 |
- int len); |
|
430 |
- /** |
|
431 | 420 |
* Calculate the scalar product of two vectors of floats. |
432 | 421 |
* @param v1 first vector, 16-byte aligned |
433 | 422 |
* @param v2 second vector, 16-byte aligned |
... | ... |
@@ -26,7 +26,11 @@ |
26 | 26 |
|
27 | 27 |
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); |
28 | 28 |
|
29 |
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, |
|
30 |
+ int len); |
|
31 |
+ |
|
29 | 32 |
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) |
30 | 33 |
{ |
31 | 34 |
fdsp->vector_fmul = ff_vector_fmul_neon; |
35 |
+ fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; |
|
32 | 36 |
} |
... | ... |
@@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1 |
62 | 62 |
3: vst1.32 {d16-d19},[r0,:128]! |
63 | 63 |
bx lr |
64 | 64 |
endfunc |
65 |
+ |
|
66 |
+function ff_vector_fmac_scalar_neon, export=1 |
|
67 |
+VFP len .req r2 |
|
68 |
+VFP acc .req r3 |
|
69 |
+NOVFP len .req r3 |
|
70 |
+NOVFP acc .req r2 |
|
71 |
+VFP vdup.32 q15, d0[0] |
|
72 |
+NOVFP vdup.32 q15, r2 |
|
73 |
+ bics r12, len, #15 |
|
74 |
+ mov acc, r0 |
|
75 |
+ beq 3f |
|
76 |
+ vld1.32 {q0}, [r1,:128]! |
|
77 |
+ vld1.32 {q8}, [acc,:128]! |
|
78 |
+ vld1.32 {q1}, [r1,:128]! |
|
79 |
+ vld1.32 {q9}, [acc,:128]! |
|
80 |
+1: vmla.f32 q8, q0, q15 |
|
81 |
+ vld1.32 {q2}, [r1,:128]! |
|
82 |
+ vld1.32 {q10}, [acc,:128]! |
|
83 |
+ vmla.f32 q9, q1, q15 |
|
84 |
+ vld1.32 {q3}, [r1,:128]! |
|
85 |
+ vld1.32 {q11}, [acc,:128]! |
|
86 |
+ vmla.f32 q10, q2, q15 |
|
87 |
+ vst1.32 {q8}, [r0,:128]! |
|
88 |
+ vmla.f32 q11, q3, q15 |
|
89 |
+ vst1.32 {q9}, [r0,:128]! |
|
90 |
+ subs r12, r12, #16 |
|
91 |
+ beq 2f |
|
92 |
+ vld1.32 {q0}, [r1,:128]! |
|
93 |
+ vld1.32 {q8}, [acc,:128]! |
|
94 |
+ vst1.32 {q10}, [r0,:128]! |
|
95 |
+ vld1.32 {q1}, [r1,:128]! |
|
96 |
+ vld1.32 {q9}, [acc,:128]! |
|
97 |
+ vst1.32 {q11}, [r0,:128]! |
|
98 |
+ b 1b |
|
99 |
+2: vst1.32 {q10}, [r0,:128]! |
|
100 |
+ vst1.32 {q11}, [r0,:128]! |
|
101 |
+ ands len, len, #15 |
|
102 |
+ it eq |
|
103 |
+ bxeq lr |
|
104 |
+3: vld1.32 {q0}, [r1,:128]! |
|
105 |
+ vld1.32 {q8}, [acc,:128]! |
|
106 |
+ vmla.f32 q8, q0, q15 |
|
107 |
+ vst1.32 {q8}, [r0,:128]! |
|
108 |
+ subs len, len, #4 |
|
109 |
+ bgt 3b |
|
110 |
+ bx lr |
|
111 |
+ .unreq len |
|
112 |
+endfunc |
... | ... |
@@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1, |
28 | 28 |
dst[i] = src0[i] * src1[i]; |
29 | 29 |
} |
30 | 30 |
|
31 |
+static void vector_fmac_scalar_c(float *dst, const float *src, float mul, |
|
32 |
+ int len) |
|
33 |
+{ |
|
34 |
+ int i; |
|
35 |
+ for (i = 0; i < len; i++) |
|
36 |
+ dst[i] += src[i] * mul; |
|
37 |
+} |
|
38 |
+ |
|
31 | 39 |
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) |
32 | 40 |
{ |
33 | 41 |
fdsp->vector_fmul = vector_fmul_c; |
42 |
+ fdsp->vector_fmac_scalar = vector_fmac_scalar_c; |
|
34 | 43 |
|
35 | 44 |
#if ARCH_ARM |
36 | 45 |
ff_float_dsp_init_arm(fdsp); |
... | ... |
@@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext { |
35 | 35 |
*/ |
36 | 36 |
void (*vector_fmul)(float *dst, const float *src0, const float *src1, |
37 | 37 |
int len); |
38 |
+ |
|
39 |
+ /** |
|
40 |
+ * Multiply a vector of floats by a scalar float and add to |
|
41 |
+ * destination vector. Source and destination vectors must |
|
42 |
+ * overlap exactly or not at all. |
|
43 |
+ * |
|
44 |
+ * @param dst result vector |
|
45 |
+ * constraints: 16-byte aligned |
|
46 |
+ * @param src input vector |
|
47 |
+ * constraints: 16-byte aligned |
|
48 |
+ * @param mul scalar value |
|
49 |
+ * @param len length of vector |
|
50 |
+ * constraints: multiple of 4 |
|
51 |
+ */ |
|
52 |
+ void (*vector_fmac_scalar)(float *dst, const float *src, float mul, |
|
53 |
+ int len); |
|
38 | 54 |
} AVFloatDSPContext; |
39 | 55 |
|
40 | 56 |
/** |