Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
float_dsp: ppc: add a separate header for Altivec function prototypes
ARM: fix float_dsp breakage from d5a7229
Add a float DSP framework to libavutil
PPC: Move types_altivec.h and util_altivec.h from libavcodec to libavutil
ARM: Move asm.S from libavcodec to libavutil
vc1dsp: mark put/avg_vc1_mspel_mc() always_inline

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2012/06/09 06:02:54
Showing 95 changed files
... ...
@@ -30,6 +30,7 @@
30 30
 #ifndef AVCODEC_AAC_H
31 31
 #define AVCODEC_AAC_H
32 32
 
33
+#include "libavutil/float_dsp.h"
33 34
 #include "avcodec.h"
34 35
 #include "dsputil.h"
35 36
 #include "fft.h"
... ...
@@ -292,6 +293,7 @@ typedef struct {
292 292
     FFTContext mdct_ltp;
293 293
     DSPContext dsp;
294 294
     FmtConvertContext fmt_conv;
295
+    AVFloatDSPContext fdsp;
295 296
     int random_state;
296 297
     /** @} */
297 298
 
... ...
@@ -79,7 +79,7 @@
79 79
            Parametric Stereo.
80 80
  */
81 81
 
82
-
82
+#include "libavutil/float_dsp.h"
83 83
 #include "avcodec.h"
84 84
 #include "internal.h"
85 85
 #include "get_bits.h"
... ...
@@ -901,6 +901,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
901 901
 
902 902
     ff_dsputil_init(&ac->dsp, avctx);
903 903
     ff_fmt_convert_init(&ac->fmt_conv, avctx);
904
+    avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
904 905
 
905 906
     ac->random_state = 0x1f2e3d4c;
906 907
 
... ...
@@ -2069,10 +2070,10 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out,
2069 2069
     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
2070 2070
 
2071 2071
     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2072
-        ac->dsp.vector_fmul(in, in, lwindow_prev, 1024);
2072
+        ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024);
2073 2073
     } else {
2074 2074
         memset(in, 0, 448 * sizeof(float));
2075
-        ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2075
+        ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
2076 2076
     }
2077 2077
     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2078 2078
         ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
... ...
@@ -30,6 +30,7 @@
30 30
  * add temporal noise shaping
31 31
  ***********************************/
32 32
 
33
+#include "libavutil/float_dsp.h"
33 34
 #include "libavutil/opt.h"
34 35
 #include "avcodec.h"
35 36
 #include "put_bits.h"
... ...
@@ -182,7 +183,9 @@ static void put_audio_specific_config(AVCodecContext *avctx)
182 182
 }
183 183
 
184 184
 #define WINDOW_FUNC(type) \
185
-static void apply_ ##type ##_window(DSPContext *dsp, SingleChannelElement *sce, const float *audio)
185
+static void apply_ ##type ##_window(DSPContext *dsp, AVFloatDSPContext *fdsp, \
186
+                                    SingleChannelElement *sce, \
187
+                                    const float *audio)
186 188
 
187 189
 WINDOW_FUNC(only_long)
188 190
 {
... ...
@@ -190,7 +193,7 @@ WINDOW_FUNC(only_long)
190 190
     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
191 191
     float *out = sce->ret;
192 192
 
193
-    dsp->vector_fmul        (out,        audio,        lwindow, 1024);
193
+    fdsp->vector_fmul       (out,        audio,        lwindow, 1024);
194 194
     dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
195 195
 }
196 196
 
... ...
@@ -200,7 +203,7 @@ WINDOW_FUNC(long_start)
200 200
     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
201 201
     float *out = sce->ret;
202 202
 
203
-    dsp->vector_fmul(out, audio, lwindow, 1024);
203
+    fdsp->vector_fmul(out, audio, lwindow, 1024);
204 204
     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
205 205
     dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
206 206
     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
... ...
@@ -213,7 +216,7 @@ WINDOW_FUNC(long_stop)
213 213
     float *out = sce->ret;
214 214
 
215 215
     memset(out, 0, sizeof(out[0]) * 448);
216
-    dsp->vector_fmul(out + 448, audio + 448, swindow, 128);
216
+    fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
217 217
     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
218 218
     dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
219 219
 }
... ...
@@ -227,7 +230,7 @@ WINDOW_FUNC(eight_short)
227 227
     int w;
228 228
 
229 229
     for (w = 0; w < 8; w++) {
230
-        dsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
230
+        fdsp->vector_fmul       (out, in, w ? pwindow : swindow, 128);
231 231
         out += 128;
232 232
         in  += 128;
233 233
         dsp->vector_fmul_reverse(out, in, swindow, 128);
... ...
@@ -235,7 +238,9 @@ WINDOW_FUNC(eight_short)
235 235
     }
236 236
 }
237 237
 
238
-static void (*const apply_window[4])(DSPContext *dsp, SingleChannelElement *sce, const float *audio) = {
238
+static void (*const apply_window[4])(DSPContext *dsp, AVFloatDSPContext *fdsp,
239
+                                     SingleChannelElement *sce,
240
+                                     const float *audio) = {
239 241
     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
240 242
     [LONG_START_SEQUENCE]  = apply_long_start_window,
241 243
     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
... ...
@@ -248,7 +253,7 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
248 248
     int i;
249 249
     float *output = sce->ret;
250 250
 
251
-    apply_window[sce->ics.window_sequence[0]](&s->dsp, sce, audio);
251
+    apply_window[sce->ics.window_sequence[0]](&s->dsp, &s->fdsp, sce, audio);
252 252
 
253 253
     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
254 254
         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
... ...
@@ -693,6 +698,7 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
693 693
     int ret = 0;
694 694
 
695 695
     ff_dsputil_init(&s->dsp, avctx);
696
+    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
696 697
 
697 698
     // window init
698 699
     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
... ...
@@ -22,6 +22,7 @@
22 22
 #ifndef AVCODEC_AACENC_H
23 23
 #define AVCODEC_AACENC_H
24 24
 
25
+#include "libavutil/float_dsp.h"
25 26
 #include "avcodec.h"
26 27
 #include "put_bits.h"
27 28
 #include "dsputil.h"
... ...
@@ -61,6 +62,7 @@ typedef struct AACEncContext {
61 61
     FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
62 62
     FFTContext mdct128;                          ///< short (128 samples) frame transform context
63 63
     DSPContext  dsp;
64
+    AVFloatDSPContext fdsp;
64 65
     float *planar_samples[6];                    ///< saved preprocessed input
65 66
 
66 67
     int samplerate_index;                        ///< MPEG-4 samplerate index
... ...
@@ -2491,6 +2491,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
2491 2491
 #endif
2492 2492
 
2493 2493
     ff_dsputil_init(&s->dsp, avctx);
2494
+    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
2494 2495
     ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
2495 2496
 
2496 2497
     dprint_options(s);
... ...
@@ -29,6 +29,8 @@
29 29
 #define AVCODEC_AC3ENC_H
30 30
 
31 31
 #include <stdint.h>
32
+
33
+#include "libavutil/float_dsp.h"
32 34
 #include "ac3.h"
33 35
 #include "ac3dsp.h"
34 36
 #include "avcodec.h"
... ...
@@ -158,6 +160,7 @@ typedef struct AC3EncodeContext {
158 158
     AVCodecContext *avctx;                  ///< parent AVCodecContext
159 159
     PutBitContext pb;                       ///< bitstream writer context
160 160
     DSPContext dsp;
161
+    AVFloatDSPContext fdsp;
161 162
     AC3DSPContext ac3dsp;                   ///< AC-3 optimized functions
162 163
     FFTContext mdct;                        ///< FFT context for MDCT calculation
163 164
     const SampleType *mdct_window;          ///< MDCT window function array
... ...
@@ -68,10 +68,11 @@ av_cold int AC3_NAME(mdct_init)(AC3EncodeContext *s)
68 68
 /*
69 69
  * Apply KBD window to input samples prior to MDCT.
70 70
  */
71
-static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input,
71
+static void apply_window(void *dsp, int16_t *output, const int16_t *input,
72 72
                          const int16_t *window, unsigned int len)
73 73
 {
74
-    dsp->apply_window_int16(output, input, window, len);
74
+    DSPContext *dsp0 = dsp;
75
+    dsp0->apply_window_int16(output, input, window, len);
75 76
 }
76 77
 
77 78
 
... ...
@@ -86,10 +86,12 @@ av_cold int ff_ac3_float_mdct_init(AC3EncodeContext *s)
86 86
 /*
87 87
  * Apply KBD window to input samples prior to MDCT.
88 88
  */
89
-static void apply_window(DSPContext *dsp, float *output, const float *input,
90
-                         const float *window, unsigned int len)
89
+static void apply_window(void *dsp, float *output,
90
+                         const float *input, const float *window,
91
+                         unsigned int len)
91 92
 {
92
-    dsp->vector_fmul(output, input, window, len);
93
+    AVFloatDSPContext *fdsp = dsp;
94
+    fdsp->vector_fmul(output, input, window, len);
93 95
 }
94 96
 
95 97
 
... ...
@@ -33,7 +33,7 @@
33 33
 
34 34
 static void scale_coefficients(AC3EncodeContext *s);
35 35
 
36
-static void apply_window(DSPContext *dsp, SampleType *output,
36
+static void apply_window(void *dsp, SampleType *output,
37 37
                          const SampleType *input, const SampleType *window,
38 38
                          unsigned int len);
39 39
 
... ...
@@ -110,8 +110,13 @@ static void apply_mdct(AC3EncodeContext *s)
110 110
             AC3Block *block = &s->blocks[blk];
111 111
             const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
112 112
 
113
+#if CONFIG_AC3ENC_FLOAT
114
+            apply_window(&s->fdsp, s->windowed_samples, input_samples,
115
+                         s->mdct_window, AC3_WINDOW_SIZE);
116
+#else
113 117
             apply_window(&s->dsp, s->windowed_samples, input_samples,
114 118
                          s->mdct_window, AC3_WINDOW_SIZE);
119
+#endif
115 120
 
116 121
             if (s->fixed_point)
117 122
                 block->coeff_shift[ch+1] = normalize_samples(s);
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 function ff_ps_add_squares_neon, export=1
24 24
         mov             r3,  r0
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 function ff_ac3_update_bap_counts_arm, export=1
24 24
         push            {lr}
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 function ff_ac3_bit_alloc_calc_bap_armv6, export=1
24 24
         ldr             r12, [sp]
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 function ff_ac3_max_msb_abs_int16_neon, export=1
24 24
         vmov.i16        q0,  #0
25 25
deleted file mode 100644
... ...
@@ -1,231 +0,0 @@
1
-/*
2
- * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3
- *
4
- * This file is part of FFmpeg.
5
- *
6
- * FFmpeg is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU Lesser General Public
8
- * License as published by the Free Software Foundation; either
9
- * version 2.1 of the License, or (at your option) any later version.
10
- *
11
- * FFmpeg is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
- * Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public
17
- * License along with FFmpeg; if not, write to the Free Software
18
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
- */
20
-
21
-#include "config.h"
22
-
23
-#ifdef __ELF__
24
-#   define ELF
25
-#else
26
-#   define ELF @
27
-#endif
28
-
29
-#if CONFIG_THUMB
30
-#   define A @
31
-#   define T
32
-#else
33
-#   define A
34
-#   define T @
35
-#endif
36
-
37
-#if   HAVE_NEON
38
-        .arch           armv7-a
39
-#elif HAVE_ARMV6T2
40
-        .arch           armv6t2
41
-#elif HAVE_ARMV6
42
-        .arch           armv6
43
-#elif HAVE_ARMV5TE
44
-        .arch           armv5te
45
-#endif
46
-
47
-#if   HAVE_NEON
48
-        .fpu            neon
49
-#elif HAVE_ARMVFP
50
-        .fpu            vfp
51
-#endif
52
-
53
-        .syntax unified
54
-T       .thumb
55
-
56
-.macro  require8 val=1
57
-ELF     .eabi_attribute 24, \val
58
-.endm
59
-
60
-.macro  preserve8 val=1
61
-ELF     .eabi_attribute 25, \val
62
-.endm
63
-
64
-.macro  function name, export=0
65
-    .macro endfunc
66
-ELF     .size   \name, . - \name
67
-        .endfunc
68
-        .purgem endfunc
69
-    .endm
70
-        .text
71
-        .align          2
72
-    .if \export
73
-        .global EXTERN_ASM\name
74
-EXTERN_ASM\name:
75
-    .endif
76
-ELF     .type   \name, %function
77
-        .func   \name
78
-\name:
79
-.endm
80
-
81
-.macro  const   name, align=2
82
-    .macro endconst
83
-ELF     .size   \name, . - \name
84
-        .purgem endconst
85
-    .endm
86
-        .section        .rodata
87
-        .align          \align
88
-\name:
89
-.endm
90
-
91
-#if !HAVE_ARMV6T2
92
-.macro  movw    rd, val
93
-        mov     \rd, \val &  255
94
-        orr     \rd, \val & ~255
95
-.endm
96
-#endif
97
-
98
-.macro  mov32   rd, val
99
-#if HAVE_ARMV6T2
100
-        movw            \rd, #(\val) & 0xffff
101
-    .if (\val) >> 16
102
-        movt            \rd, #(\val) >> 16
103
-    .endif
104
-#else
105
-        ldr             \rd, =\val
106
-#endif
107
-.endm
108
-
109
-.macro  movrel rd, val
110
-#if HAVE_ARMV6T2 && !CONFIG_PIC && !defined(__APPLE__)
111
-        movw            \rd, #:lower16:\val
112
-        movt            \rd, #:upper16:\val
113
-#else
114
-        ldr             \rd, =\val
115
-#endif
116
-.endm
117
-
118
-.macro  ldr_pre         rt,  rn,  rm:vararg
119
-A       ldr             \rt, [\rn, \rm]!
120
-T       add             \rn, \rn, \rm
121
-T       ldr             \rt, [\rn]
122
-.endm
123
-
124
-.macro  ldr_dpre        rt,  rn,  rm:vararg
125
-A       ldr             \rt, [\rn, -\rm]!
126
-T       sub             \rn, \rn, \rm
127
-T       ldr             \rt, [\rn]
128
-.endm
129
-
130
-.macro  ldr_nreg        rt,  rn,  rm:vararg
131
-A       ldr             \rt, [\rn, -\rm]
132
-T       sub             \rt, \rn, \rm
133
-T       ldr             \rt, [\rt]
134
-.endm
135
-
136
-.macro  ldr_post        rt,  rn,  rm:vararg
137
-A       ldr             \rt, [\rn], \rm
138
-T       ldr             \rt, [\rn]
139
-T       add             \rn, \rn, \rm
140
-.endm
141
-
142
-.macro  ldrd_reg        rt,  rt2, rn,  rm
143
-A       ldrd            \rt, \rt2, [\rn, \rm]
144
-T       add             \rt, \rn, \rm
145
-T       ldrd            \rt, \rt2, [\rt]
146
-.endm
147
-
148
-.macro  ldrd_post       rt,  rt2, rn,  rm
149
-A       ldrd            \rt, \rt2, [\rn], \rm
150
-T       ldrd            \rt, \rt2, [\rn]
151
-T       add             \rn, \rn, \rm
152
-.endm
153
-
154
-.macro  ldrh_pre        rt,  rn,  rm
155
-A       ldrh            \rt, [\rn, \rm]!
156
-T       add             \rn, \rn, \rm
157
-T       ldrh            \rt, [\rn]
158
-.endm
159
-
160
-.macro  ldrh_dpre       rt,  rn,  rm
161
-A       ldrh            \rt, [\rn, -\rm]!
162
-T       sub             \rn, \rn, \rm
163
-T       ldrh            \rt, [\rn]
164
-.endm
165
-
166
-.macro  ldrh_post       rt,  rn,  rm
167
-A       ldrh            \rt, [\rn], \rm
168
-T       ldrh            \rt, [\rn]
169
-T       add             \rn, \rn, \rm
170
-.endm
171
-
172
-.macro  ldrb_post       rt,  rn,  rm
173
-A       ldrb            \rt, [\rn], \rm
174
-T       ldrb            \rt, [\rn]
175
-T       add             \rn, \rn, \rm
176
-.endm
177
-
178
-.macro  str_post       rt,  rn,  rm:vararg
179
-A       str             \rt, [\rn], \rm
180
-T       str             \rt, [\rn]
181
-T       add             \rn, \rn, \rm
182
-.endm
183
-
184
-.macro  strb_post       rt,  rn,  rm:vararg
185
-A       strb            \rt, [\rn], \rm
186
-T       strb            \rt, [\rn]
187
-T       add             \rn, \rn, \rm
188
-.endm
189
-
190
-.macro  strd_post       rt,  rt2, rn,  rm
191
-A       strd            \rt, \rt2, [\rn], \rm
192
-T       strd            \rt, \rt2, [\rn]
193
-T       add             \rn, \rn, \rm
194
-.endm
195
-
196
-.macro  strh_pre        rt,  rn,  rm
197
-A       strh            \rt, [\rn, \rm]!
198
-T       add             \rn, \rn, \rm
199
-T       strh            \rt, [\rn]
200
-.endm
201
-
202
-.macro  strh_dpre       rt,  rn,  rm
203
-A       strh            \rt, [\rn, -\rm]!
204
-T       sub             \rn, \rn, \rm
205
-T       strh            \rt, [\rn]
206
-.endm
207
-
208
-.macro  strh_post       rt,  rn,  rm
209
-A       strh            \rt, [\rn], \rm
210
-T       strh            \rt, [\rn]
211
-T       add             \rn, \rn, \rm
212
-.endm
213
-
214
-.macro  strh_dpost       rt,  rn,  rm
215
-A       strh            \rt, [\rn], -\rm
216
-T       strh            \rt, [\rn]
217
-T       sub             \rn, \rn, \rm
218
-.endm
219
-
220
-#if HAVE_VFP_ARGS
221
-        .eabi_attribute 28, 1
222
-#   define VFP
223
-#   define NOVFP @
224
-#else
225
-#   define VFP   @
226
-#   define NOVFP
227
-#endif
228
-
229
-#define GLUE(a, b) a ## b
230
-#define JOIN(a, b) GLUE(a, b)
231
-#define X(s) JOIN(EXTERN_ASM, s)
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 function ff_dca_lfe_fir_neon, export=1
24 24
         push            {r4-r6,lr}
... ...
@@ -20,7 +20,7 @@
20 20
 @
21 21
 
22 22
 #include "config.h"
23
-#include "asm.S"
23
+#include "libavutil/arm/asm.S"
24 24
 
25 25
         preserve8
26 26
 
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
         preserve8
24 24
 
... ...
@@ -150,7 +150,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
150 150
 void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
151 151
 void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
152 152
 
153
-void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
154 153
 void ff_vector_fmul_window_neon(float *dst, const float *src0,
155 154
                                 const float *src1, const float *win, int len);
156 155
 void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
... ...
@@ -328,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
328 328
         c->vp3_idct_dc_add   = ff_vp3_idct_dc_add_neon;
329 329
     }
330 330
 
331
-    c->vector_fmul                = ff_vector_fmul_neon;
332 331
     c->vector_fmul_window         = ff_vector_fmul_window_neon;
333 332
     c->vector_fmul_scalar         = ff_vector_fmul_scalar_neon;
334 333
     c->vector_fmac_scalar         = ff_vector_fmac_scalar_neon;
... ...
@@ -18,20 +18,13 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "libavutil/arm/cpu.h"
22 21
 #include "libavcodec/dsputil.h"
23 22
 #include "dsputil_arm.h"
24 23
 
25
-void ff_vector_fmul_vfp(float *dst, const float *src0,
26
-                        const float *src1, int len);
27 24
 void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
28 25
                                 const float *src1, int len);
29 26
 
30 27
 void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx)
31 28
 {
32
-    int cpu_flags = av_get_cpu_flags();
33
-
34
-    if (!have_vfpv3(cpu_flags))
35
-        c->vector_fmul = ff_vector_fmul_vfp;
36 29
     c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
37 30
 }
... ...
@@ -20,7 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "config.h"
23
-#include "asm.S"
23
+#include "libavutil/arm/asm.S"
24 24
 
25 25
         preserve8
26 26
 
... ...
@@ -534,45 +534,6 @@ function ff_add_pixels_clamped_neon, export=1
534 534
         bx              lr
535 535
 endfunc
536 536
 
537
-function ff_vector_fmul_neon, export=1
538
-        subs            r3,  r3,  #8
539
-        vld1.32         {d0-d3},  [r1,:128]!
540
-        vld1.32         {d4-d7},  [r2,:128]!
541
-        vmul.f32        q8,  q0,  q2
542
-        vmul.f32        q9,  q1,  q3
543
-        beq             3f
544
-        bics            ip,  r3,  #15
545
-        beq             2f
546
-1:      subs            ip,  ip,  #16
547
-        vld1.32         {d0-d1},  [r1,:128]!
548
-        vld1.32         {d4-d5},  [r2,:128]!
549
-        vmul.f32        q10, q0,  q2
550
-        vld1.32         {d2-d3},  [r1,:128]!
551
-        vld1.32         {d6-d7},  [r2,:128]!
552
-        vmul.f32        q11, q1,  q3
553
-        vst1.32         {d16-d19},[r0,:128]!
554
-        vld1.32         {d0-d1},  [r1,:128]!
555
-        vld1.32         {d4-d5},  [r2,:128]!
556
-        vmul.f32        q8,  q0,  q2
557
-        vld1.32         {d2-d3},  [r1,:128]!
558
-        vld1.32         {d6-d7},  [r2,:128]!
559
-        vmul.f32        q9,  q1,  q3
560
-        vst1.32         {d20-d23},[r0,:128]!
561
-        bne             1b
562
-        ands            r3,  r3,  #15
563
-        beq             3f
564
-2:      vld1.32         {d0-d1},  [r1,:128]!
565
-        vld1.32         {d4-d5},  [r2,:128]!
566
-        vst1.32         {d16-d17},[r0,:128]!
567
-        vmul.f32        q8,  q0,  q2
568
-        vld1.32         {d2-d3},  [r1,:128]!
569
-        vld1.32         {d6-d7},  [r2,:128]!
570
-        vst1.32         {d18-d19},[r0,:128]!
571
-        vmul.f32        q9,  q1,  q3
572
-3:      vst1.32         {d16-d19},[r0,:128]!
573
-        bx              lr
574
-endfunc
575
-
576 537
 function ff_vector_fmul_window_neon, export=1
577 538
         push            {r4,r5,lr}
578 539
         ldr             lr,  [sp, #12]
... ...
@@ -19,7 +19,7 @@
19 19
  */
20 20
 
21 21
 #include "config.h"
22
-#include "asm.S"
22
+#include "libavutil/arm/asm.S"
23 23
 
24 24
 /*
25 25
  * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
... ...
@@ -37,53 +37,6 @@
37 37
  */
38 38
 
39 39
 /**
40
- * ARM VFP optimized implementation of 'vector_fmul_c' function.
41
- * Assume that len is a positive number and is multiple of 8
42
- */
43
-@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
44
-function ff_vector_fmul_vfp, export=1
45
-        vpush           {d8-d15}
46
-        fmrx            r12, fpscr
47
-        orr             r12, r12, #(3 << 16) /* set vector size to 4 */
48
-        fmxr            fpscr, r12
49
-
50
-        vldmia          r1!, {s0-s3}
51
-        vldmia          r2!, {s8-s11}
52
-        vldmia          r1!, {s4-s7}
53
-        vldmia          r2!, {s12-s15}
54
-        vmul.f32        s8,  s0,  s8
55
-1:
56
-        subs            r3,  r3,  #16
57
-        vmul.f32        s12, s4,  s12
58
-        itttt           ge
59
-        vldmiage        r1!, {s16-s19}
60
-        vldmiage        r2!, {s24-s27}
61
-        vldmiage        r1!, {s20-s23}
62
-        vldmiage        r2!, {s28-s31}
63
-        it              ge
64
-        vmulge.f32      s24, s16, s24
65
-        vstmia          r0!, {s8-s11}
66
-        vstmia          r0!, {s12-s15}
67
-        it              ge
68
-        vmulge.f32      s28, s20, s28
69
-        itttt           gt
70
-        vldmiagt        r1!, {s0-s3}
71
-        vldmiagt        r2!, {s8-s11}
72
-        vldmiagt        r1!, {s4-s7}
73
-        vldmiagt        r2!, {s12-s15}
74
-        ittt            ge
75
-        vmulge.f32      s8,  s0,  s8
76
-        vstmiage        r0!, {s24-s27}
77
-        vstmiage        r0!, {s28-s31}
78
-        bgt             1b
79
-
80
-        bic             r12, r12, #(7 << 16) /* set vector size back to 1 */
81
-        fmxr            fpscr, r12
82
-        vpop            {d8-d15}
83
-        bx              lr
84
-endfunc
85
-
86
-/**
87 40
  * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
88 41
  * Assume that len is a positive number and is multiple of 8
89 42
  */
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 .macro  bflies          d0,  d1,  r0,  r1
24 24
         vrev64.32       \r0, \d1                @ t5, t6, t1, t2
... ...
@@ -24,7 +24,7 @@
24 24
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 25
  */
26 26
 
27
-#include "asm.S"
27
+#include "libavutil/arm/asm.S"
28 28
 
29 29
 #define M_SQRT1_2 0.70710678118654752440
30 30
 
... ...
@@ -20,7 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "config.h"
23
-#include "asm.S"
23
+#include "libavutil/arm/asm.S"
24 24
 
25 25
         preserve8
26 26
 
... ...
@@ -19,7 +19,7 @@
19 19
  */
20 20
 
21 21
 #include "config.h"
22
-#include "asm.S"
22
+#include "libavutil/arm/asm.S"
23 23
 
24 24
 /**
25 25
  * ARM VFP optimized float to int16 conversion.
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 /* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
24 24
 .macro  h264_chroma_mc8 type, codec=h264
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 #include "neon.S"
23 23
 
24 24
         /* H.264 loop filter */
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
         preserve8
24 24
 
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
         .macro ldcol.8  rd,  rs,  rt,  n=8,  hi=0
24 24
 .if \n == 8 || \hi == 0
... ...
@@ -19,7 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
-#include "asm.S"
22
+#include "libavutil/arm/asm.S"
23 23
 
24 24
         preserve8
25 25
         .fpu neon
... ...
@@ -25,7 +25,7 @@
25 25
 
26 26
 */
27 27
 
28
-#include "asm.S"
28
+#include "libavutil/arm/asm.S"
29 29
 
30 30
 #define FIX_0_298631336 2446
31 31
 #define FIX_0_541196100 4433
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
         preserve8
24 24
 
... ...
@@ -19,7 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
-#include "asm.S"
22
+#include "libavutil/arm/asm.S"
23 23
 
24 24
         preserve8
25 25
 
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 .macro  skip            args:vararg
24 24
 .endm
... ...
@@ -20,7 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "config.h"
23
-#include "asm.S"
23
+#include "libavutil/arm/asm.S"
24 24
 
25 25
 /*
26 26
  * Special optimized version of dct_unquantize_h263_helper_c, it
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 #include "asm-offsets.h"
23 23
 
24 24
 function ff_dct_unquantize_h263_inter_neon, export=1
... ...
@@ -19,7 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
-#include "asm.S"
22
+#include "libavutil/arm/asm.S"
23 23
 
24 24
         preserve8
25 25
 
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 #include "neon.S"
23 23
 
24 24
 .macro rv34_inv_transform    r0
... ...
@@ -19,7 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
-#include "asm.S"
22
+#include "libavutil/arm/asm.S"
23 23
 #include "neon.S"
24 24
 
25 25
 .macro  qpel_lowpass    r0,  r1,  rc1, rc2, shift
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 function ff_sbr_sum64x5_neon, export=1
24 24
         push            {lr}
... ...
@@ -23,7 +23,7 @@
23 23
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 24
  */
25 25
 
26
-#include "asm.S"
26
+#include "libavutil/arm/asm.S"
27 27
 
28 28
 /* useful constants for the algorithm, they are save in __constant_ptr__ at */
29 29
 /* the end of the source code.*/
... ...
@@ -21,7 +21,7 @@
21 21
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 22
  */
23 23
 
24
-#include "asm.S"
24
+#include "libavutil/arm/asm.S"
25 25
 
26 26
 #define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27 27
 #define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
... ...
@@ -21,7 +21,7 @@
21 21
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 22
  */
23 23
 
24
-#include "asm.S"
24
+#include "libavutil/arm/asm.S"
25 25
 
26 26
 #define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27 27
 #define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
... ...
@@ -23,7 +23,7 @@
23 23
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 24
  */
25 25
 
26
-#include "asm.S"
26
+#include "libavutil/arm/asm.S"
27 27
 
28 28
 #define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
29 29
 #define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
         preserve8
24 24
 
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 const   vp3_idct_constants, align=4
24 24
 .short 64277, 60547, 54491, 46341, 36410, 25080, 12785
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 .macro  vp6_edge_filter
24 24
         vdup.16         q3,  r2                 @ t
... ...
@@ -18,7 +18,7 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
-#include "asm.S"
21
+#include "libavutil/arm/asm.S"
22 22
 
23 23
 .macro rac_get_prob     h, bs, buf, cw, pr, t0, t1
24 24
         adds            \bs, \bs, \t0
... ...
@@ -52,7 +52,7 @@
52 52
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53 53
  */
54 54
 
55
-#include "asm.S"
55
+#include "libavutil/arm/asm.S"
56 56
 
57 57
 @ idct
58 58
 
... ...
@@ -21,7 +21,7 @@
21 21
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 22
  */
23 23
 
24
-#include "asm.S"
24
+#include "libavutil/arm/asm.S"
25 25
 #include "neon.S"
26 26
 
27 27
 function ff_vp8_luma_dc_wht_neon, export=1
... ...
@@ -36,9 +36,9 @@
36 36
 #include <stddef.h>
37 37
 #include <stdio.h>
38 38
 
39
+#include "libavutil/float_dsp.h"
39 40
 #include "avcodec.h"
40 41
 #include "get_bits.h"
41
-#include "dsputil.h"
42 42
 #include "bytestream.h"
43 43
 #include "fft.h"
44 44
 #include "fmtconvert.h"
... ...
@@ -125,13 +125,13 @@ typedef struct {
125 125
 
126 126
     FFTContext          mdct_ctx;
127 127
     FmtConvertContext   fmt_conv;
128
+    AVFloatDSPContext   fdsp;
128 129
 } ATRAC3Context;
129 130
 
130 131
 static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE];
131 132
 static VLC              spectral_coeff_tab[7];
132 133
 static float            gain_tab1[16];
133 134
 static float            gain_tab2[31];
134
-static DSPContext       dsp;
135 135
 
136 136
 
137 137
 /**
... ...
@@ -164,7 +164,7 @@ static void IMLT(ATRAC3Context *q, float *pInput, float *pOutput, int odd_band)
164 164
     q->mdct_ctx.imdct_calc(&q->mdct_ctx,pOutput,pInput);
165 165
 
166 166
     /* Perform windowing on the output. */
167
-    dsp.vector_fmul(pOutput, pOutput, mdct_window, MDCT_SIZE);
167
+    q->fdsp.vector_fmul(pOutput, pOutput, mdct_window, MDCT_SIZE);
168 168
 
169 169
 }
170 170
 
... ...
@@ -1039,7 +1039,7 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
1039 1039
         q->matrix_coeff_index_next[i] = 3;
1040 1040
     }
1041 1041
 
1042
-    ff_dsputil_init(&dsp, avctx);
1042
+    avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
1043 1043
     ff_fmt_convert_init(&q->fmt_conv, avctx);
1044 1044
 
1045 1045
     q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels);
... ...
@@ -2471,12 +2471,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2471 2471
 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2472 2472
 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2473 2473
 
2474
-static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
2475
-    int i;
2476
-    for(i=0; i<len; i++)
2477
-        dst[i] = src0[i] * src1[i];
2478
-}
2479
-
2480 2474
 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2481 2475
     int i;
2482 2476
     src1 += len-1;
... ...
@@ -3054,7 +3048,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
3054 3054
 #if CONFIG_AC3_DECODER
3055 3055
     c->ac3_downmix = ff_ac3_downmix_c;
3056 3056
 #endif
3057
-    c->vector_fmul = vector_fmul_c;
3058 3057
     c->vector_fmul_reverse = vector_fmul_reverse_c;
3059 3058
     c->vector_fmul_add = vector_fmul_add_c;
3060 3059
     c->vector_fmul_window = vector_fmul_window_c;
... ...
@@ -403,7 +403,6 @@ typedef struct DSPContext {
403 403
     void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
404 404
     void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
405 405
     /* assume len is a multiple of 16, and arrays are 32-byte aligned */
406
-    void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len);
407 406
     void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
408 407
     /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
409 408
     void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
... ...
@@ -35,6 +35,7 @@
35 35
  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 36
  */
37 37
 
38
+#include "libavutil/float_dsp.h"
38 39
 #include "libavutil/mathematics.h"
39 40
 #include "nellymoser.h"
40 41
 #include "avcodec.h"
... ...
@@ -55,6 +56,7 @@ typedef struct NellyMoserEncodeContext {
55 55
     AVCodecContext  *avctx;
56 56
     int             last_frame;
57 57
     DSPContext      dsp;
58
+    AVFloatDSPContext fdsp;
58 59
     FFTContext      mdct_ctx;
59 60
     AudioFrameQueue afq;
60 61
     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
... ...
@@ -120,11 +122,11 @@ static void apply_mdct(NellyMoserEncodeContext *s)
120 120
     float *in1 = s->buf + NELLY_BUF_LEN;
121 121
     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
122 122
 
123
-    s->dsp.vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
123
+    s->fdsp.vector_fmul       (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
124 124
     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
125 125
     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
126 126
 
127
-    s->dsp.vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
127
+    s->fdsp.vector_fmul       (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
128 128
     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
129 129
     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
130 130
 }
... ...
@@ -172,6 +174,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
172 172
     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
173 173
         goto error;
174 174
     ff_dsputil_init(&s->dsp, avctx);
175
+    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
175 176
 
176 177
     /* Generate overlap window */
177 178
     ff_init_ff_sine_windows(7);
... ...
@@ -24,9 +24,9 @@
24 24
 #if HAVE_ALTIVEC_H
25 25
 #include <altivec.h>
26 26
 #endif
27
+#include "libavutil/ppc/types_altivec.h"
28
+#include "libavutil/ppc/util_altivec.h"
27 29
 #include "libavcodec/dsputil.h"
28
-#include "util_altivec.h"
29
-#include "types_altivec.h"
30 30
 #include "dsputil_altivec.h"
31 31
 
32 32
 static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
... ...
@@ -19,9 +19,10 @@
19 19
  * License along with FFmpeg; if not, write to the Free Software
20 20
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 21
  */
22
+
23
+#include "libavutil/ppc/types_altivec.h"
24
+#include "libavutil/ppc/util_altivec.h"
22 25
 #include "libavcodec/fft.h"
23
-#include "util_altivec.h"
24
-#include "types_altivec.h"
25 26
 
26 27
 /**
27 28
  * Do a complex FFT with the parameters defined in ff_fft_init(). The
... ...
@@ -18,25 +18,10 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
+#include "libavutil/ppc/util_altivec.h"
21 22
 #include "libavcodec/dsputil.h"
22 23
 
23 24
 #include "dsputil_altivec.h"
24
-#include "util_altivec.h"
25
-
26
-static void vector_fmul_altivec(float *dst, const float *src0, const float *src1, int len)
27
-{
28
-    int i;
29
-    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
30
-    for(i=0; i<len-7; i+=8) {
31
-        d0 = vec_ld(0, src0+i);
32
-        s = vec_ld(0, src1+i);
33
-        d1 = vec_ld(16, src0+i);
34
-        d0 = vec_madd(d0, s, zero);
35
-        d1 = vec_madd(d1, vec_ld(16,src1+i), zero);
36
-        vec_st(d0, 0, dst+i);
37
-        vec_st(d1, 16, dst+i);
38
-    }
39
-}
40 25
 
41 26
 static void vector_fmul_reverse_altivec(float *dst, const float *src0,
42 27
                                         const float *src1, int len)
... ...
@@ -124,7 +109,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
124 124
 
125 125
 void ff_float_init_altivec(DSPContext* c, AVCodecContext *avctx)
126 126
 {
127
-    c->vector_fmul = vector_fmul_altivec;
128 127
     c->vector_fmul_reverse = vector_fmul_reverse_altivec;
129 128
     c->vector_fmul_add = vector_fmul_add_altivec;
130 129
     if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
... ...
@@ -20,8 +20,8 @@
20 20
 
21 21
 #include "libavcodec/fmtconvert.h"
22 22
 
23
+#include "libavutil/ppc/util_altivec.h"
23 24
 #include "dsputil_altivec.h"
24
-#include "util_altivec.h"
25 25
 
26 26
 static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len)
27 27
 {
... ...
@@ -20,9 +20,9 @@
20 20
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 21
  */
22 22
 
23
+#include "libavutil/ppc/types_altivec.h"
24
+#include "libavutil/ppc/util_altivec.h"
23 25
 #include "libavcodec/dsputil.h"
24
-#include "util_altivec.h"
25
-#include "types_altivec.h"
26 26
 #include "dsputil_altivec.h"
27 27
 
28 28
 /*
... ...
@@ -19,13 +19,13 @@
19 19
  */
20 20
 
21 21
 #include "libavutil/cpu.h"
22
+#include "libavutil/ppc/types_altivec.h"
23
+#include "libavutil/ppc/util_altivec.h"
22 24
 #include "libavcodec/dsputil.h"
23 25
 #include "libavcodec/h264data.h"
24 26
 #include "libavcodec/h264dsp.h"
25 27
 
26 28
 #include "dsputil_altivec.h"
27
-#include "util_altivec.h"
28
-#include "types_altivec.h"
29 29
 
30 30
 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
31 31
 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
... ...
@@ -41,8 +41,8 @@
41 41
 #if HAVE_ALTIVEC_H
42 42
 #include <altivec.h>
43 43
 #endif
44
+#include "libavutil/ppc/types_altivec.h"
44 45
 #include "libavcodec/dsputil.h"
45
-#include "types_altivec.h"
46 46
 #include "dsputil_altivec.h"
47 47
 
48 48
 #define IDCT_HALF                                       \
... ...
@@ -28,12 +28,11 @@
28 28
 #include <altivec.h>
29 29
 #endif
30 30
 
31
+#include "libavutil/ppc/types_altivec.h"
31 32
 #include "libavcodec/dsputil.h"
32 33
 
33 34
 #include "dsputil_altivec.h"
34 35
 
35
-#include "types_altivec.h"
36
-
37 36
 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
38 37
                                      int size) {
39 38
     int i, size16;
... ...
@@ -20,7 +20,7 @@
20 20
  */
21 21
 
22 22
 #include "dsputil_altivec.h"
23
-#include "util_altivec.h"
23
+#include "libavutil/ppc/util_altivec.h"
24 24
 #include "libavcodec/dsputil.h"
25 25
 #include "libavcodec/mpegaudiodsp.h"
26 26
 
... ...
@@ -23,12 +23,13 @@
23 23
 
24 24
 #include <stdlib.h>
25 25
 #include <stdio.h>
26
+
26 27
 #include "libavutil/cpu.h"
28
+#include "libavutil/ppc/types_altivec.h"
29
+#include "libavutil/ppc/util_altivec.h"
27 30
 #include "libavcodec/dsputil.h"
28 31
 #include "libavcodec/mpegvideo.h"
29 32
 
30
-#include "util_altivec.h"
31
-#include "types_altivec.h"
32 33
 #include "dsputil_altivec.h"
33 34
 
34 35
 /* AltiVec version of dct_unquantize_h263
35 36
deleted file mode 100644
... ...
@@ -1,47 +0,0 @@
1
-/*
2
- * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu>
3
- *
4
- * This file is part of FFmpeg.
5
- *
6
- * FFmpeg is free software; you can redistribute it and/or
7
- * modify it under the terms of the GNU Lesser General Public
8
- * License as published by the Free Software Foundation; either
9
- * version 2.1 of the License, or (at your option) any later version.
10
- *
11
- * FFmpeg is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
- * Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public
17
- * License along with FFmpeg; if not, write to the Free Software
18
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
- */
20
-
21
-#ifndef AVCODEC_PPC_TYPES_ALTIVEC_H
22
-#define AVCODEC_PPC_TYPES_ALTIVEC_H
23
-
24
-/***********************************************************************
25
- * Vector types
26
- **********************************************************************/
27
-#define vec_u8  vector unsigned char
28
-#define vec_s8  vector signed char
29
-#define vec_u16 vector unsigned short
30
-#define vec_s16 vector signed short
31
-#define vec_u32 vector unsigned int
32
-#define vec_s32 vector signed int
33
-#define vec_f   vector float
34
-
35
-/***********************************************************************
36
- * Null vector
37
- **********************************************************************/
38
-#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 )
39
-
40
-#define zero_u8v  (vec_u8)  zerov
41
-#define zero_s8v  (vec_s8)  zerov
42
-#define zero_u16v (vec_u16) zerov
43
-#define zero_s16v (vec_s16) zerov
44
-#define zero_u32v (vec_u32) zerov
45
-#define zero_s32v (vec_s32) zerov
46
-
47
-#endif /* AVCODEC_PPC_TYPES_ALTIVEC_H */
48 1
deleted file mode 100644
... ...
@@ -1,118 +0,0 @@
1
-/*
2
- * This file is part of FFmpeg.
3
- *
4
- * FFmpeg is free software; you can redistribute it and/or
5
- * modify it under the terms of the GNU Lesser General Public
6
- * License as published by the Free Software Foundation; either
7
- * version 2.1 of the License, or (at your option) any later version.
8
- *
9
- * FFmpeg is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
- * Lesser General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU Lesser General Public
15
- * License along with FFmpeg; if not, write to the Free Software
16
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
- */
18
-
19
-/**
20
- * @file
21
- * Contains misc utility macros and inline functions
22
- */
23
-
24
-#ifndef AVCODEC_PPC_UTIL_ALTIVEC_H
25
-#define AVCODEC_PPC_UTIL_ALTIVEC_H
26
-
27
-#include <stdint.h>
28
-
29
-#include "config.h"
30
-
31
-#if HAVE_ALTIVEC_H
32
-#include <altivec.h>
33
-#endif
34
-
35
-#include "types_altivec.h"
36
-
37
-// used to build registers permutation vectors (vcprm)
38
-// the 's' are for words in the _s_econd vector
39
-#define WORD_0 0x00,0x01,0x02,0x03
40
-#define WORD_1 0x04,0x05,0x06,0x07
41
-#define WORD_2 0x08,0x09,0x0a,0x0b
42
-#define WORD_3 0x0c,0x0d,0x0e,0x0f
43
-#define WORD_s0 0x10,0x11,0x12,0x13
44
-#define WORD_s1 0x14,0x15,0x16,0x17
45
-#define WORD_s2 0x18,0x19,0x1a,0x1b
46
-#define WORD_s3 0x1c,0x1d,0x1e,0x1f
47
-
48
-#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
49
-#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
50
-
51
-// vcprmle is used to keep the same index as in the SSE version.
52
-// it's the same as vcprm, with the index inversed
53
-// ('le' is Little Endian)
54
-#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
55
-
56
-// used to build inverse/identity vectors (vcii)
57
-// n is _n_egative, p is _p_ositive
58
-#define FLOAT_n -1.
59
-#define FLOAT_p 1.
60
-
61
-
62
-// Transpose 8x8 matrix of 16-bit elements (in-place)
63
-#define TRANSPOSE8(a,b,c,d,e,f,g,h) \
64
-do { \
65
-    vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \
66
-    vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \
67
- \
68
-    A1 = vec_mergeh (a, e); \
69
-    B1 = vec_mergel (a, e); \
70
-    C1 = vec_mergeh (b, f); \
71
-    D1 = vec_mergel (b, f); \
72
-    E1 = vec_mergeh (c, g); \
73
-    F1 = vec_mergel (c, g); \
74
-    G1 = vec_mergeh (d, h); \
75
-    H1 = vec_mergel (d, h); \
76
- \
77
-    A2 = vec_mergeh (A1, E1); \
78
-    B2 = vec_mergel (A1, E1); \
79
-    C2 = vec_mergeh (B1, F1); \
80
-    D2 = vec_mergel (B1, F1); \
81
-    E2 = vec_mergeh (C1, G1); \
82
-    F2 = vec_mergel (C1, G1); \
83
-    G2 = vec_mergeh (D1, H1); \
84
-    H2 = vec_mergel (D1, H1); \
85
- \
86
-    a = vec_mergeh (A2, E2); \
87
-    b = vec_mergel (A2, E2); \
88
-    c = vec_mergeh (B2, F2); \
89
-    d = vec_mergel (B2, F2); \
90
-    e = vec_mergeh (C2, G2); \
91
-    f = vec_mergel (C2, G2); \
92
-    g = vec_mergeh (D2, H2); \
93
-    h = vec_mergel (D2, H2); \
94
-} while (0)
95
-
96
-
97
-/** @brief loads unaligned vector @a *src with offset @a offset
98
-    and returns it */
99
-static inline vector unsigned char unaligned_load(int offset, uint8_t *src)
100
-{
101
-    register vector unsigned char first = vec_ld(offset, src);
102
-    register vector unsigned char second = vec_ld(offset+15, src);
103
-    register vector unsigned char mask = vec_lvsl(offset, src);
104
-    return vec_perm(first, second, mask);
105
-}
106
-
107
-/**
108
- * loads vector known misalignment
109
- * @param perm_vec the align permute vector to combine the two loads from lvsl
110
- */
111
-static inline vec_u8 load_with_perm_vec(int offset, uint8_t *src, vec_u8 perm_vec)
112
-{
113
-    vec_u8 a = vec_ld(offset, src);
114
-    vec_u8 b = vec_ld(offset+15, src);
115
-    return vec_perm(a, b, perm_vec);
116
-}
117
-
118
-#endif /* AVCODEC_PPC_UTIL_ALTIVEC_H */
... ...
@@ -19,12 +19,11 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/ppc/types_altivec.h"
23
+#include "libavutil/ppc/util_altivec.h"
22 24
 #include "libavcodec/dsputil.h"
23 25
 #include "libavcodec/vc1dsp.h"
24 26
 
25
-#include "util_altivec.h"
26
-#include "dsputil_altivec.h"
27
-
28 27
 // main steps of 8x8 transform
29 28
 #define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \
30 29
 do { \
... ...
@@ -18,9 +18,9 @@
18 18
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 19
  */
20 20
 
21
+#include "libavutil/ppc/types_altivec.h"
22
+#include "libavutil/ppc/util_altivec.h"
21 23
 #include "libavcodec/dsputil.h"
22
-#include "util_altivec.h"
23
-#include "types_altivec.h"
24 24
 #include "dsputil_altivec.h"
25 25
 
26 26
 static const vec_s16 constants =
... ...
@@ -21,10 +21,10 @@
21 21
  */
22 22
 
23 23
 #include "libavutil/cpu.h"
24
+#include "libavutil/ppc/types_altivec.h"
25
+#include "libavutil/ppc/util_altivec.h"
24 26
 #include "libavcodec/vp8dsp.h"
25 27
 #include "dsputil_altivec.h"
26
-#include "types_altivec.h"
27
-#include "util_altivec.h"
28 28
 
29 29
 #define REPT4(...) { __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__ }
30 30
 
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/float_dsp.h"
22 23
 #include "avcodec.h"
23 24
 #define BITSTREAM_READER_LE
24 25
 #include "get_bits.h"
... ...
@@ -26,7 +27,6 @@
26 26
 #include "lpc.h"
27 27
 #include "celp_math.h"
28 28
 #include "celp_filters.h"
29
-#include "dsputil.h"
30 29
 
31 30
 #define MAX_BACKWARD_FILTER_ORDER  36
32 31
 #define MAX_BACKWARD_FILTER_LEN    40
... ...
@@ -38,6 +38,7 @@
38 38
 typedef struct {
39 39
     AVFrame frame;
40 40
     DSPContext dsp;
41
+    AVFloatDSPContext fdsp;
41 42
     DECLARE_ALIGNED(32, float,   sp_lpc)[FFALIGN(36, 16)];   ///< LPC coefficients for speech data (spec: A)
42 43
     DECLARE_ALIGNED(32, float, gain_lpc)[FFALIGN(10, 16)];   ///< LPC coefficients for gain        (spec: GB)
43 44
 
... ...
@@ -62,7 +63,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx)
62 62
 {
63 63
     RA288Context *ractx = avctx->priv_data;
64 64
     avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
65
-    ff_dsputil_init(&ractx->dsp, avctx);
65
+    avpriv_float_dsp_init(&ractx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
66 66
 
67 67
     avcodec_get_frame_defaults(&ractx->frame);
68 68
     avctx->coded_frame = &ractx->frame;
... ...
@@ -137,7 +138,7 @@ static void do_hybrid_window(RA288Context *ractx,
137 137
                                             MAX_BACKWARD_FILTER_LEN   +
138 138
                                             MAX_BACKWARD_FILTER_NONREC, 16)]);
139 139
 
140
-    ractx->dsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16));
140
+    ractx->fdsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16));
141 141
 
142 142
     convolve(buffer1, work + order    , n      , order);
143 143
     convolve(buffer2, work + order + n, non_rec, order);
... ...
@@ -164,7 +165,7 @@ static void backward_filter(RA288Context *ractx,
164 164
     do_hybrid_window(ractx, order, n, non_rec, temp, hist, rec, window);
165 165
 
166 166
     if (!compute_lpc_coefs(temp, order, lpc, 0, 1, 1))
167
-        ractx->dsp.vector_fmul(lpc, lpc, tab, FFALIGN(order, 16));
167
+        ractx->fdsp.vector_fmul(lpc, lpc, tab, FFALIGN(order, 16));
168 168
 
169 169
     memmove(hist, hist + n, move_size*sizeof(*hist));
170 170
 }
... ...
@@ -19,6 +19,7 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
+#include "libavutil/float_dsp.h"
22 23
 #include "avcodec.h"
23 24
 #include "get_bits.h"
24 25
 #include "dsputil.h"
... ...
@@ -176,6 +177,7 @@ typedef struct TwinContext {
176 176
     AVCodecContext *avctx;
177 177
     AVFrame frame;
178 178
     DSPContext      dsp;
179
+    AVFloatDSPContext fdsp;
179 180
     FFTContext mdct_ctx[3];
180 181
 
181 182
     const ModeTab *mtab;
... ...
@@ -787,8 +789,8 @@ static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb,
787 787
             dec_bark_env(tctx, bark1[i][j], bark_use_hist[i][j], i,
788 788
                          tctx->tmp_buf, gain[sub*i+j], ftype);
789 789
 
790
-            tctx->dsp.vector_fmul(chunk + block_size*j, chunk + block_size*j, tctx->tmp_buf,
791
-                                  block_size);
790
+            tctx->fdsp.vector_fmul(chunk + block_size*j, chunk + block_size*j,
791
+                                   tctx->tmp_buf, block_size);
792 792
 
793 793
         }
794 794
 
... ...
@@ -809,7 +811,7 @@ static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb,
809 809
         dec_lpc_spectrum_inv(tctx, lsp, ftype, tctx->tmp_buf);
810 810
 
811 811
         for (j = 0; j < mtab->fmode[ftype].sub; j++) {
812
-            tctx->dsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size);
812
+            tctx->fdsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size);
813 813
             chunk += block_size;
814 814
         }
815 815
     }
... ...
@@ -1156,6 +1158,7 @@ static av_cold int twin_decode_init(AVCodecContext *avctx)
1156 1156
     }
1157 1157
 
1158 1158
     ff_dsputil_init(&tctx->dsp, avctx);
1159
+    avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
1159 1160
     if ((ret = init_mdct_win(tctx))) {
1160 1161
         av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n");
1161 1162
         twin_decode_close(avctx);
... ...
@@ -561,7 +561,7 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int
561 561
 /** Function used to do motion compensation with bicubic interpolation
562 562
  */
563 563
 #define VC1_MSPEL_MC(OP, OPNAME)\
564
-static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\
564
+static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\
565 565
 {\
566 566
     int     i, j;\
567 567
 \
... ...
@@ -30,6 +30,7 @@
30 30
 #include <math.h>
31 31
 
32 32
 #define BITSTREAM_READER_LE
33
+#include "libavutil/float_dsp.h"
33 34
 #include "avcodec.h"
34 35
 #include "get_bits.h"
35 36
 #include "dsputil.h"
... ...
@@ -128,6 +129,7 @@ typedef struct vorbis_context_s {
128 128
     AVFrame frame;
129 129
     GetBitContext gb;
130 130
     DSPContext dsp;
131
+    AVFloatDSPContext fdsp;
131 132
     FmtConvertContext fmt_conv;
132 133
 
133 134
     FFTContext mdct[2];
... ...
@@ -987,6 +989,7 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
987 987
 
988 988
     vc->avccontext = avccontext;
989 989
     ff_dsputil_init(&vc->dsp, avccontext);
990
+    avpriv_float_dsp_init(&vc->fdsp, avccontext->flags & CODEC_FLAG_BITEXACT);
990 991
     ff_fmt_convert_init(&vc->fmt_conv, avccontext);
991 992
 
992 993
     if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
... ...
@@ -1609,7 +1612,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
1609 1609
     for (j = vc->audio_channels-1;j >= 0; j--) {
1610 1610
         ch_floor_ptr = vc->channel_floors   + j           * blocksize / 2;
1611 1611
         ch_res_ptr   = vc->channel_residues + res_chan[j] * blocksize / 2;
1612
-        vc->dsp.vector_fmul(ch_floor_ptr, ch_floor_ptr, ch_res_ptr, blocksize / 2);
1612
+        vc->fdsp.vector_fmul(ch_floor_ptr, ch_floor_ptr, ch_res_ptr, blocksize / 2);
1613 1613
         mdct->imdct_half(mdct, ch_res_ptr, ch_floor_ptr);
1614 1614
     }
1615 1615
 
... ...
@@ -2581,11 +2581,6 @@ int  ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src,
2581 2581
 
2582 2582
 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2583 2583
 
2584
-void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
2585
-                        int len);
2586
-void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
2587
-                        int len);
2588
-
2589 2584
 void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
2590 2585
                                 const float *src1, int len);
2591 2586
 void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
... ...
@@ -2915,7 +2910,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
2915 2915
     c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
2916 2916
     c->ac3_downmix             = ac3_downmix_sse;
2917 2917
 #if HAVE_YASM
2918
-    c->vector_fmul         = ff_vector_fmul_sse;
2919 2918
     c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
2920 2919
     c->vector_fmul_add     = ff_vector_fmul_add_sse;
2921 2920
 #endif
... ...
@@ -3077,7 +3071,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
3077 3077
         }
3078 3078
     }
3079 3079
     c->butterflies_float_interleave = ff_butterflies_float_interleave_avx;
3080
-    c->vector_fmul = ff_vector_fmul_avx;
3081 3080
     c->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
3082 3081
     c->vector_fmul_add = ff_vector_fmul_add_avx;
3083 3082
 #endif
... ...
@@ -1130,38 +1130,6 @@ VECTOR_CLIP_INT32 6, 1, 0, 0
1130 1130
 %endif
1131 1131
 
1132 1132
 ;-----------------------------------------------------------------------------
1133
-; void vector_fmul(float *dst, const float *src0, const float *src1, int len)
1134
-;-----------------------------------------------------------------------------
1135
-%macro VECTOR_FMUL 0
1136
-cglobal vector_fmul, 4,4,2, dst, src0, src1, len
1137
-    lea       lenq, [lend*4 - 2*mmsize]
1138
-ALIGN 16
1139
-.loop
1140
-    mova      m0,   [src0q + lenq]
1141
-    mova      m1,   [src0q + lenq + mmsize]
1142
-    mulps     m0, m0, [src1q + lenq]
1143
-    mulps     m1, m1, [src1q + lenq + mmsize]
1144
-    mova      [dstq + lenq], m0
1145
-    mova      [dstq + lenq + mmsize], m1
1146
-
1147
-    sub       lenq, 2*mmsize
1148
-    jge       .loop
1149
-%if mmsize == 32
1150
-    vzeroupper
1151
-    RET
1152
-%else
1153
-    REP_RET
1154
-%endif
1155
-%endmacro
1156
-
1157
-INIT_XMM sse
1158
-VECTOR_FMUL
1159
-%if HAVE_AVX
1160
-INIT_YMM avx
1161
-VECTOR_FMUL
1162
-%endif
1163
-
1164
-;-----------------------------------------------------------------------------
1165 1133
 ; void vector_fmul_reverse(float *dst, const float *src0, const float *src1,
1166 1134
 ;                          int len)
1167 1135
 ;-----------------------------------------------------------------------------
... ...
@@ -63,6 +63,7 @@ OBJS = adler32.o                                                        \
63 63
        eval.o                                                           \
64 64
        fifo.o                                                           \
65 65
        file.o                                                           \
66
+       float_dsp.o                                                      \
66 67
        imgutils.o                                                       \
67 68
        intfloat_readwrite.o                                             \
68 69
        inverse.o                                                        \
... ...
@@ -1 +1,8 @@
1 1
 OBJS += arm/cpu.o                                                       \
2
+        arm/float_dsp_init_arm.o                                        \
3
+
4
+ARMVFP-OBJS += arm/float_dsp_init_vfp.o                                 \
5
+               arm/float_dsp_vfp.o                                      \
6
+
7
+NEON-OBJS += arm/float_dsp_init_neon.o                                  \
8
+             arm/float_dsp_neon.o                                       \
2 9
new file mode 100644
... ...
@@ -0,0 +1,231 @@
0
+/*
1
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "config.h"
21
+
22
+#ifdef __ELF__
23
+#   define ELF
24
+#else
25
+#   define ELF @
26
+#endif
27
+
28
+#if CONFIG_THUMB
29
+#   define A @
30
+#   define T
31
+#else
32
+#   define A
33
+#   define T @
34
+#endif
35
+
36
+#if   HAVE_NEON
37
+        .arch           armv7-a
38
+#elif HAVE_ARMV6T2
39
+        .arch           armv6t2
40
+#elif HAVE_ARMV6
41
+        .arch           armv6
42
+#elif HAVE_ARMV5TE
43
+        .arch           armv5te
44
+#endif
45
+
46
+#if   HAVE_NEON
47
+        .fpu            neon
48
+#elif HAVE_ARMVFP
49
+        .fpu            vfp
50
+#endif
51
+
52
+        .syntax unified
53
+T       .thumb
54
+
55
+.macro  require8 val=1
56
+ELF     .eabi_attribute 24, \val
57
+.endm
58
+
59
+.macro  preserve8 val=1
60
+ELF     .eabi_attribute 25, \val
61
+.endm
62
+
63
+.macro  function name, export=0
64
+    .macro endfunc
65
+ELF     .size   \name, . - \name
66
+        .endfunc
67
+        .purgem endfunc
68
+    .endm
69
+        .text
70
+        .align          2
71
+    .if \export
72
+        .global EXTERN_ASM\name
73
+EXTERN_ASM\name:
74
+    .endif
75
+ELF     .type   \name, %function
76
+        .func   \name
77
+\name:
78
+.endm
79
+
80
+.macro  const   name, align=2
81
+    .macro endconst
82
+ELF     .size   \name, . - \name
83
+        .purgem endconst
84
+    .endm
85
+        .section        .rodata
86
+        .align          \align
87
+\name:
88
+.endm
89
+
90
+#if !HAVE_ARMV6T2
91
+.macro  movw    rd, val
92
+        mov     \rd, \val &  255
93
+        orr     \rd, \val & ~255
94
+.endm
95
+#endif
96
+
97
+.macro  mov32   rd, val
98
+#if HAVE_ARMV6T2
99
+        movw            \rd, #(\val) & 0xffff
100
+    .if (\val) >> 16
101
+        movt            \rd, #(\val) >> 16
102
+    .endif
103
+#else
104
+        ldr             \rd, =\val
105
+#endif
106
+.endm
107
+
108
+.macro  movrel rd, val
109
+#if HAVE_ARMV6T2 && !CONFIG_PIC && !defined(__APPLE__)
110
+        movw            \rd, #:lower16:\val
111
+        movt            \rd, #:upper16:\val
112
+#else
113
+        ldr             \rd, =\val
114
+#endif
115
+.endm
116
+
117
+.macro  ldr_pre         rt,  rn,  rm:vararg
118
+A       ldr             \rt, [\rn, \rm]!
119
+T       add             \rn, \rn, \rm
120
+T       ldr             \rt, [\rn]
121
+.endm
122
+
123
+.macro  ldr_dpre        rt,  rn,  rm:vararg
124
+A       ldr             \rt, [\rn, -\rm]!
125
+T       sub             \rn, \rn, \rm
126
+T       ldr             \rt, [\rn]
127
+.endm
128
+
129
+.macro  ldr_nreg        rt,  rn,  rm:vararg
130
+A       ldr             \rt, [\rn, -\rm]
131
+T       sub             \rt, \rn, \rm
132
+T       ldr             \rt, [\rt]
133
+.endm
134
+
135
+.macro  ldr_post        rt,  rn,  rm:vararg
136
+A       ldr             \rt, [\rn], \rm
137
+T       ldr             \rt, [\rn]
138
+T       add             \rn, \rn, \rm
139
+.endm
140
+
141
+.macro  ldrd_reg        rt,  rt2, rn,  rm
142
+A       ldrd            \rt, \rt2, [\rn, \rm]
143
+T       add             \rt, \rn, \rm
144
+T       ldrd            \rt, \rt2, [\rt]
145
+.endm
146
+
147
+.macro  ldrd_post       rt,  rt2, rn,  rm
148
+A       ldrd            \rt, \rt2, [\rn], \rm
149
+T       ldrd            \rt, \rt2, [\rn]
150
+T       add             \rn, \rn, \rm
151
+.endm
152
+
153
+.macro  ldrh_pre        rt,  rn,  rm
154
+A       ldrh            \rt, [\rn, \rm]!
155
+T       add             \rn, \rn, \rm
156
+T       ldrh            \rt, [\rn]
157
+.endm
158
+
159
+.macro  ldrh_dpre       rt,  rn,  rm
160
+A       ldrh            \rt, [\rn, -\rm]!
161
+T       sub             \rn, \rn, \rm
162
+T       ldrh            \rt, [\rn]
163
+.endm
164
+
165
+.macro  ldrh_post       rt,  rn,  rm
166
+A       ldrh            \rt, [\rn], \rm
167
+T       ldrh            \rt, [\rn]
168
+T       add             \rn, \rn, \rm
169
+.endm
170
+
171
+.macro  ldrb_post       rt,  rn,  rm
172
+A       ldrb            \rt, [\rn], \rm
173
+T       ldrb            \rt, [\rn]
174
+T       add             \rn, \rn, \rm
175
+.endm
176
+
177
+.macro  str_post       rt,  rn,  rm:vararg
178
+A       str             \rt, [\rn], \rm
179
+T       str             \rt, [\rn]
180
+T       add             \rn, \rn, \rm
181
+.endm
182
+
183
+.macro  strb_post       rt,  rn,  rm:vararg
184
+A       strb            \rt, [\rn], \rm
185
+T       strb            \rt, [\rn]
186
+T       add             \rn, \rn, \rm
187
+.endm
188
+
189
+.macro  strd_post       rt,  rt2, rn,  rm
190
+A       strd            \rt, \rt2, [\rn], \rm
191
+T       strd            \rt, \rt2, [\rn]
192
+T       add             \rn, \rn, \rm
193
+.endm
194
+
195
+.macro  strh_pre        rt,  rn,  rm
196
+A       strh            \rt, [\rn, \rm]!
197
+T       add             \rn, \rn, \rm
198
+T       strh            \rt, [\rn]
199
+.endm
200
+
201
+.macro  strh_dpre       rt,  rn,  rm
202
+A       strh            \rt, [\rn, -\rm]!
203
+T       sub             \rn, \rn, \rm
204
+T       strh            \rt, [\rn]
205
+.endm
206
+
207
+.macro  strh_post       rt,  rn,  rm
208
+A       strh            \rt, [\rn], \rm
209
+T       strh            \rt, [\rn]
210
+T       add             \rn, \rn, \rm
211
+.endm
212
+
213
+.macro  strh_dpost       rt,  rn,  rm
214
+A       strh            \rt, [\rn], -\rm
215
+T       strh            \rt, [\rn]
216
+T       sub             \rn, \rn, \rm
217
+.endm
218
+
219
+#if HAVE_VFP_ARGS
220
+        .eabi_attribute 28, 1
221
+#   define VFP
222
+#   define NOVFP @
223
+#else
224
+#   define VFP   @
225
+#   define NOVFP
226
+#endif
227
+
228
+#define GLUE(a, b) a ## b
229
+#define JOIN(a, b) GLUE(a, b)
230
+#define X(s) JOIN(EXTERN_ASM, s)
0 231
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+/*
1
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H
21
+#define AVUTIL_ARM_FLOAT_DSP_ARM_H
22
+
23
+#include "libavutil/float_dsp.h"
24
+
25
+void ff_float_dsp_init_vfp (AVFloatDSPContext *fdsp);
26
+void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp);
27
+
28
+#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */
0 29
new file mode 100644
... ...
@@ -0,0 +1,33 @@
0
+/*
1
+ * ARM optimized DSP utils
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/arm/cpu.h"
21
+#include "libavutil/float_dsp.h"
22
+#include "float_dsp_arm.h"
23
+
24
+void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp)
25
+{
26
+    int cpu_flags = av_get_cpu_flags();
27
+
28
+    if (have_vfp(cpu_flags))
29
+        ff_float_dsp_init_vfp(fdsp);
30
+    if (have_neon(cpu_flags))
31
+        ff_float_dsp_init_neon(fdsp);
32
+}
0 33
new file mode 100644
... ...
@@ -0,0 +1,32 @@
0
+/*
1
+ * ARM NEON optimised Float DSP functions
2
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include <stdint.h>
22
+
23
+#include "libavutil/float_dsp.h"
24
+#include "float_dsp_arm.h"
25
+
26
+void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
27
+
28
+void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
29
+{
30
+    fdsp->vector_fmul = ff_vector_fmul_neon;
31
+}
0 32
new file mode 100644
... ...
@@ -0,0 +1,34 @@
0
+/*
1
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/arm/cpu.h"
21
+#include "libavutil/float_dsp.h"
22
+#include "float_dsp_arm.h"
23
+
24
+void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1,
25
+                        int len);
26
+
27
+void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp)
28
+{
29
+    int cpu_flags = av_get_cpu_flags();
30
+
31
+    if (!have_vfpv3(cpu_flags))
32
+        fdsp->vector_fmul = ff_vector_fmul_vfp;
33
+}
0 34
new file mode 100644
... ...
@@ -0,0 +1,64 @@
0
+/*
1
+ * ARM NEON optimised Float DSP functions
2
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "config.h"
22
+#include "asm.S"
23
+
24
+        preserve8
25
+
26
+function ff_vector_fmul_neon, export=1
27
+        subs            r3,  r3,  #8
28
+        vld1.32         {d0-d3},  [r1,:128]!
29
+        vld1.32         {d4-d7},  [r2,:128]!
30
+        vmul.f32        q8,  q0,  q2
31
+        vmul.f32        q9,  q1,  q3
32
+        beq             3f
33
+        bics            ip,  r3,  #15
34
+        beq             2f
35
+1:      subs            ip,  ip,  #16
36
+        vld1.32         {d0-d1},  [r1,:128]!
37
+        vld1.32         {d4-d5},  [r2,:128]!
38
+        vmul.f32        q10, q0,  q2
39
+        vld1.32         {d2-d3},  [r1,:128]!
40
+        vld1.32         {d6-d7},  [r2,:128]!
41
+        vmul.f32        q11, q1,  q3
42
+        vst1.32         {d16-d19},[r0,:128]!
43
+        vld1.32         {d0-d1},  [r1,:128]!
44
+        vld1.32         {d4-d5},  [r2,:128]!
45
+        vmul.f32        q8,  q0,  q2
46
+        vld1.32         {d2-d3},  [r1,:128]!
47
+        vld1.32         {d6-d7},  [r2,:128]!
48
+        vmul.f32        q9,  q1,  q3
49
+        vst1.32         {d20-d23},[r0,:128]!
50
+        bne             1b
51
+        ands            r3,  r3,  #15
52
+        beq             3f
53
+2:      vld1.32         {d0-d1},  [r1,:128]!
54
+        vld1.32         {d4-d5},  [r2,:128]!
55
+        vst1.32         {d16-d17},[r0,:128]!
56
+        vmul.f32        q8,  q0,  q2
57
+        vld1.32         {d2-d3},  [r1,:128]!
58
+        vld1.32         {d6-d7},  [r2,:128]!
59
+        vst1.32         {d18-d19},[r0,:128]!
60
+        vmul.f32        q9,  q1,  q3
61
+3:      vst1.32         {d16-d19},[r0,:128]!
62
+        bx              lr
63
+endfunc
0 64
new file mode 100644
... ...
@@ -0,0 +1,68 @@
0
+/*
1
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
2
+ *
3
+ * This file is part of FFmpeg
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "config.h"
21
+#include "asm.S"
22
+
23
+/**
24
+ * Assume that len is a positive number and is multiple of 8
25
+ */
26
+@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
27
+function ff_vector_fmul_vfp, export=1
28
+        vpush           {d8-d15}
29
+        fmrx            r12, fpscr
30
+        orr             r12, r12, #(3 << 16) /* set vector size to 4 */
31
+        fmxr            fpscr, r12
32
+
33
+        vldmia          r1!, {s0-s3}
34
+        vldmia          r2!, {s8-s11}
35
+        vldmia          r1!, {s4-s7}
36
+        vldmia          r2!, {s12-s15}
37
+        vmul.f32        s8,  s0,  s8
38
+1:
39
+        subs            r3,  r3,  #16
40
+        vmul.f32        s12, s4,  s12
41
+        itttt           ge
42
+        vldmiage        r1!, {s16-s19}
43
+        vldmiage        r2!, {s24-s27}
44
+        vldmiage        r1!, {s20-s23}
45
+        vldmiage        r2!, {s28-s31}
46
+        it              ge
47
+        vmulge.f32      s24, s16, s24
48
+        vstmia          r0!, {s8-s11}
49
+        vstmia          r0!, {s12-s15}
50
+        it              ge
51
+        vmulge.f32      s28, s20, s28
52
+        itttt           gt
53
+        vldmiagt        r1!, {s0-s3}
54
+        vldmiagt        r2!, {s8-s11}
55
+        vldmiagt        r1!, {s4-s7}
56
+        vldmiagt        r2!, {s12-s15}
57
+        ittt            ge
58
+        vmulge.f32      s8,  s0,  s8
59
+        vstmiage        r0!, {s24-s27}
60
+        vstmiage        r0!, {s28-s31}
61
+        bgt             1b
62
+
63
+        bic             r12, r12, #(7 << 16) /* set vector size back to 1 */
64
+        fmxr            fpscr, r12
65
+        vpop            {d8-d15}
66
+        bx              lr
67
+endfunc
0 68
new file mode 100644
... ...
@@ -0,0 +1,42 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include "config.h"
19
+
20
+#include "float_dsp.h"
21
+
22
+static void vector_fmul_c(float *dst, const float *src0, const float *src1,
23
+                          int len)
24
+{
25
+    int i;
26
+    for (i = 0; i < len; i++)
27
+        dst[i] = src0[i] * src1[i];
28
+}
29
+
30
+void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
31
+{
32
+    fdsp->vector_fmul = vector_fmul_c;
33
+
34
+#if ARCH_ARM
35
+    ff_float_dsp_init_arm(fdsp);
36
+#elif ARCH_PPC
37
+    ff_float_dsp_init_ppc(fdsp, bit_exact);
38
+#elif ARCH_X86
39
+    ff_float_dsp_init_x86(fdsp);
40
+#endif
41
+}
0 42
new file mode 100644
... ...
@@ -0,0 +1,53 @@
0
+/*
1
+ * This file is part of FFmpeg.
2
+ *
3
+ * FFmpeg is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * FFmpeg is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with FFmpeg; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#ifndef AVUTIL_FLOAT_DSP_H
19
+#define AVUTIL_FLOAT_DSP_H
20
+
21
+typedef struct AVFloatDSPContext {
22
+    /**
23
+     * Calculate the product of two vectors of floats and store the result in
24
+     * a vector of floats.
25
+     *
26
+     * @param dst  output vector
27
+     *             constraints: 32-byte aligned
28
+     * @param src0 first input vector
29
+     *             constraints: 32-byte aligned
30
+     * @param src1 second input vector
31
+     *             constraints: 32-byte aligned
32
+     * @param len  number of elements in the input
33
+     *             constraints: multiple of 16
34
+     */
35
+    void (*vector_fmul)(float *dst, const float *src0, const float *src1,
36
+                        int len);
37
+} AVFloatDSPContext;
38
+
39
+/**
40
+ * Initialize a float DSP context.
41
+ *
42
+ * @param fdsp    float DSP context
43
+ * @param strict  setting to non-zero avoids using functions which may not be IEEE-754 compliant
44
+ */
45
+void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int strict);
46
+
47
+
48
+void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
49
+void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
50
+void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
51
+
52
+#endif /* AVUTIL_FLOAT_DSP_H */
... ...
@@ -1 +1,4 @@
1 1
 OBJS += ppc/cpu.o                                                       \
2
+        ppc/float_dsp_init.o                                            \
3
+
4
+ALTIVEC-OBJS += ppc/float_dsp_altivec.o                                 \
2 5
new file mode 100644
... ...
@@ -0,0 +1,38 @@
0
+/*
1
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "util_altivec.h"
21
+#include "float_dsp_altivec.h"
22
+
23
+void ff_vector_fmul_altivec(float *dst, const float *src0, const float *src1,
24
+                            int len)
25
+{
26
+    int i;
27
+    vector float d0, d1, s, zero = (vector float)vec_splat_u32(0);
28
+    for (i = 0; i < len - 7; i += 8) {
29
+        d0 = vec_ld( 0, src0 + i);
30
+        s  = vec_ld( 0, src1 + i);
31
+        d1 = vec_ld(16, src0 + i);
32
+        d0 = vec_madd(d0, s, zero);
33
+        d1 = vec_madd(d1, vec_ld(16, src1 + i), zero);
34
+        vec_st(d0,  0, dst + i);
35
+        vec_st(d1, 16, dst + i);
36
+    }
37
+}
0 38
new file mode 100644
... ...
@@ -0,0 +1,27 @@
0
+/*
1
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H
21
+#define AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H
22
+
23
+extern void ff_vector_fmul_altivec(float *dst, const float *src0,
24
+                                   const float *src1, int len);
25
+
26
+#endif /* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */
0 27
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+/*
1
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "config.h"
21
+#include "libavutil/cpu.h"
22
+#include "libavutil/float_dsp.h"
23
+#include "float_dsp_altivec.h"
24
+
25
+void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact)
26
+{
27
+#if HAVE_ALTIVEC
28
+    int mm_flags = av_get_cpu_flags();
29
+
30
+    if (!(mm_flags & AV_CPU_FLAG_ALTIVEC))
31
+        return;
32
+
33
+    fdsp->vector_fmul = ff_vector_fmul_altivec;
34
+#endif
35
+}
0 36
new file mode 100644
... ...
@@ -0,0 +1,47 @@
0
+/*
1
+ * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVUTIL_PPC_TYPES_ALTIVEC_H
21
+#define AVUTIL_PPC_TYPES_ALTIVEC_H
22
+
23
+/***********************************************************************
24
+ * Vector types
25
+ **********************************************************************/
26
+#define vec_u8  vector unsigned char
27
+#define vec_s8  vector signed char
28
+#define vec_u16 vector unsigned short
29
+#define vec_s16 vector signed short
30
+#define vec_u32 vector unsigned int
31
+#define vec_s32 vector signed int
32
+#define vec_f   vector float
33
+
34
+/***********************************************************************
35
+ * Null vector
36
+ **********************************************************************/
37
+#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 )
38
+
39
+#define zero_u8v  (vec_u8)  zerov
40
+#define zero_s8v  (vec_s8)  zerov
41
+#define zero_u16v (vec_u16) zerov
42
+#define zero_s16v (vec_s16) zerov
43
+#define zero_u32v (vec_u32) zerov
44
+#define zero_s32v (vec_s32) zerov
45
+
46
+#endif /* AVUTIL_PPC_TYPES_ALTIVEC_H */
0 47
new file mode 100644
... ...
@@ -0,0 +1,118 @@
0
+/*
1
+ * This file is part of FFmpeg.
2
+ *
3
+ * FFmpeg is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * FFmpeg is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with FFmpeg; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+/**
19
+ * @file
20
+ * Contains misc utility macros and inline functions
21
+ */
22
+
23
+#ifndef AVUTIL_PPC_UTIL_ALTIVEC_H
24
+#define AVUTIL_PPC_UTIL_ALTIVEC_H
25
+
26
+#include <stdint.h>
27
+
28
+#include "config.h"
29
+
30
+#if HAVE_ALTIVEC_H
31
+#include <altivec.h>
32
+#endif
33
+
34
+#include "types_altivec.h"
35
+
36
+// used to build registers permutation vectors (vcprm)
37
+// the 's' are for words in the _s_econd vector
38
+#define WORD_0 0x00,0x01,0x02,0x03
39
+#define WORD_1 0x04,0x05,0x06,0x07
40
+#define WORD_2 0x08,0x09,0x0a,0x0b
41
+#define WORD_3 0x0c,0x0d,0x0e,0x0f
42
+#define WORD_s0 0x10,0x11,0x12,0x13
43
+#define WORD_s1 0x14,0x15,0x16,0x17
44
+#define WORD_s2 0x18,0x19,0x1a,0x1b
45
+#define WORD_s3 0x1c,0x1d,0x1e,0x1f
46
+
47
+#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
48
+#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
49
+
50
+// vcprmle is used to keep the same index as in the SSE version.
51
+// it's the same as vcprm, with the index inversed
52
+// ('le' is Little Endian)
53
+#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
54
+
55
+// used to build inverse/identity vectors (vcii)
56
+// n is _n_egative, p is _p_ositive
57
+#define FLOAT_n -1.
58
+#define FLOAT_p 1.
59
+
60
+
61
+// Transpose 8x8 matrix of 16-bit elements (in-place)
62
+#define TRANSPOSE8(a,b,c,d,e,f,g,h) \
63
+do { \
64
+    vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \
65
+    vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \
66
+ \
67
+    A1 = vec_mergeh (a, e); \
68
+    B1 = vec_mergel (a, e); \
69
+    C1 = vec_mergeh (b, f); \
70
+    D1 = vec_mergel (b, f); \
71
+    E1 = vec_mergeh (c, g); \
72
+    F1 = vec_mergel (c, g); \
73
+    G1 = vec_mergeh (d, h); \
74
+    H1 = vec_mergel (d, h); \
75
+ \
76
+    A2 = vec_mergeh (A1, E1); \
77
+    B2 = vec_mergel (A1, E1); \
78
+    C2 = vec_mergeh (B1, F1); \
79
+    D2 = vec_mergel (B1, F1); \
80
+    E2 = vec_mergeh (C1, G1); \
81
+    F2 = vec_mergel (C1, G1); \
82
+    G2 = vec_mergeh (D1, H1); \
83
+    H2 = vec_mergel (D1, H1); \
84
+ \
85
+    a = vec_mergeh (A2, E2); \
86
+    b = vec_mergel (A2, E2); \
87
+    c = vec_mergeh (B2, F2); \
88
+    d = vec_mergel (B2, F2); \
89
+    e = vec_mergeh (C2, G2); \
90
+    f = vec_mergel (C2, G2); \
91
+    g = vec_mergeh (D2, H2); \
92
+    h = vec_mergel (D2, H2); \
93
+} while (0)
94
+
95
+
96
+/** @brief loads unaligned vector @a *src with offset @a offset
97
+    and returns it */
98
+static inline vector unsigned char unaligned_load(int offset, uint8_t *src)
99
+{
100
+    register vector unsigned char first = vec_ld(offset, src);
101
+    register vector unsigned char second = vec_ld(offset+15, src);
102
+    register vector unsigned char mask = vec_lvsl(offset, src);
103
+    return vec_perm(first, second, mask);
104
+}
105
+
106
+/**
107
+ * loads vector known misalignment
108
+ * @param perm_vec the align permute vector to combine the two loads from lvsl
109
+ */
110
+static inline vec_u8 load_with_perm_vec(int offset, uint8_t *src, vec_u8 perm_vec)
111
+{
112
+    vec_u8 a = vec_ld(offset, src);
113
+    vec_u8 b = vec_ld(offset+15, src);
114
+    return vec_perm(a, b, perm_vec);
115
+}
116
+
117
+#endif /* AVUTIL_PPC_UTIL_ALTIVEC_H */
... ...
@@ -1 +1,4 @@
1 1
 OBJS += x86/cpu.o                                                       \
2
+        x86/float_dsp_init.o                                            \
3
+
4
+YASM-OBJS += x86/float_dsp.o                                            \
2 5
new file mode 100644
... ...
@@ -0,0 +1,55 @@
0
+;*****************************************************************************
1
+;* x86-optimized Float DSP functions
2
+;*
3
+;* This file is part of Libav.
4
+;*
5
+;* Libav is free software; you can redistribute it and/or
6
+;* modify it under the terms of the GNU Lesser General Public
7
+;* License as published by the Free Software Foundation; either
8
+;* version 2.1 of the License, or (at your option) any later version.
9
+;*
10
+;* Libav is distributed in the hope that it will be useful,
11
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+;* Lesser General Public License for more details.
14
+;*
15
+;* You should have received a copy of the GNU Lesser General Public
16
+;* License along with Libav; if not, write to the Free Software
17
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+;******************************************************************************
19
+
20
+%include "x86inc.asm"
21
+
22
+SECTION .text
23
+
24
+;-----------------------------------------------------------------------------
25
+; void vector_fmul(float *dst, const float *src0, const float *src1, int len)
26
+;-----------------------------------------------------------------------------
27
+%macro VECTOR_FMUL 0
28
+cglobal vector_fmul, 4,4,2, dst, src0, src1, len
29
+    lea       lenq, [lend*4 - 2*mmsize]
30
+ALIGN 16
31
+.loop
32
+    mova      m0,   [src0q + lenq]
33
+    mova      m1,   [src0q + lenq + mmsize]
34
+    mulps     m0, m0, [src1q + lenq]
35
+    mulps     m1, m1, [src1q + lenq + mmsize]
36
+    mova      [dstq + lenq], m0
37
+    mova      [dstq + lenq + mmsize], m1
38
+
39
+    sub       lenq, 2*mmsize
40
+    jge       .loop
41
+%if mmsize == 32
42
+    vzeroupper
43
+    RET
44
+%else
45
+    REP_RET
46
+%endif
47
+%endmacro
48
+
49
+INIT_XMM sse
50
+VECTOR_FMUL
51
+%if HAVE_AVX
52
+INIT_YMM avx
53
+VECTOR_FMUL
54
+%endif
0 55
new file mode 100644
... ...
@@ -0,0 +1,41 @@
0
+/*
1
+ * This file is part of FFmpeg.
2
+ *
3
+ * FFmpeg is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * FFmpeg is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with FFmpeg; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#include "config.h"
19
+
20
+#include "libavutil/cpu.h"
21
+#include "libavutil/float_dsp.h"
22
+
23
+extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1,
24
+                               int len);
25
+extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1,
26
+                               int len);
27
+
28
+void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
29
+{
30
+#if HAVE_YASM
31
+    int mm_flags = av_get_cpu_flags();
32
+
33
+    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
34
+        fdsp->vector_fmul = ff_vector_fmul_sse;
35
+    }
36
+    if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
37
+        fdsp->vector_fmul = ff_vector_fmul_avx;
38
+    }
39
+#endif
40
+}