2 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,49 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include <stdint.h> |
|
19 |
+ |
|
20 |
+#include "config.h" |
|
21 |
+#include "libavutil/attributes.h" |
|
22 |
+#include "libavutil/cpu.h" |
|
23 |
+#include "libavutil/aarch64/cpu.h" |
|
24 |
+#include "libavutil/samplefmt.h" |
|
25 |
+#include "libavresample/audio_convert.h" |
|
26 |
+ |
|
27 |
+void ff_conv_flt_to_s16_neon(int16_t *dst, const float *src, int len); |
|
28 |
+void ff_conv_fltp_to_s16_neon(int16_t *dst, float *const *src, |
|
29 |
+ int len, int channels); |
|
30 |
+void ff_conv_fltp_to_s16_2ch_neon(int16_t *dst, float *const *src, |
|
31 |
+ int len, int channels); |
|
32 |
+ |
|
33 |
+av_cold void ff_audio_convert_init_aarch64(AudioConvert *ac) |
|
34 |
+{ |
|
35 |
+ int cpu_flags = av_get_cpu_flags(); |
|
36 |
+ |
|
37 |
+ if (have_neon(cpu_flags)) { |
|
38 |
+ ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT, |
|
39 |
+ 0, 16, 8, "NEON", |
|
40 |
+ ff_conv_flt_to_s16_neon); |
|
41 |
+ ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, |
|
42 |
+ 2, 16, 8, "NEON", |
|
43 |
+ ff_conv_fltp_to_s16_2ch_neon); |
|
44 |
+ ff_audio_convert_set_func(ac, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLTP, |
|
45 |
+ 0, 16, 8, "NEON", |
|
46 |
+ ff_conv_fltp_to_s16_neon); |
|
47 |
+ } |
|
48 |
+} |
0 | 49 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,363 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
2 |
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net> |
|
3 |
+ * |
|
4 |
+ * This file is part of Libav. |
|
5 |
+ * |
|
6 |
+ * Libav is free software; you can redistribute it and/or |
|
7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
8 |
+ * License as published by the Free Software Foundation; either |
|
9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
10 |
+ * |
|
11 |
+ * Libav is distributed in the hope that it will be useful, |
|
12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
+ * Lesser General Public License for more details. |
|
15 |
+ * |
|
16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
17 |
+ * License along with Libav; if not, write to the Free Software |
|
18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
+ */ |
|
20 |
+ |
|
21 |
+#include "config.h" |
|
22 |
+#include "libavutil/aarch64/asm.S" |
|
23 |
+ |
|
24 |
+function ff_conv_flt_to_s16_neon, export=1 |
|
25 |
+ subs x2, x2, #8 |
|
26 |
+ ld1 {v0.4s}, [x1], #16 |
|
27 |
+ fcvtzs v4.4s, v0.4s, #31 |
|
28 |
+ ld1 {v1.4s}, [x1], #16 |
|
29 |
+ fcvtzs v5.4s, v1.4s, #31 |
|
30 |
+ b.eq 3f |
|
31 |
+ ands x12, x2, #~15 |
|
32 |
+ b.eq 2f |
|
33 |
+1: subs x12, x12, #16 |
|
34 |
+ sqrshrn v4.4h, v4.4s, #16 |
|
35 |
+ ld1 {v2.4s}, [x1], #16 |
|
36 |
+ fcvtzs v6.4s, v2.4s, #31 |
|
37 |
+ sqrshrn2 v4.8h, v5.4s, #16 |
|
38 |
+ ld1 {v3.4s}, [x1], #16 |
|
39 |
+ fcvtzs v7.4s, v3.4s, #31 |
|
40 |
+ sqrshrn v6.4h, v6.4s, #16 |
|
41 |
+ st1 {v4.8h}, [x0], #16 |
|
42 |
+ sqrshrn2 v6.8h, v7.4s, #16 |
|
43 |
+ ld1 {v0.4s}, [x1], #16 |
|
44 |
+ fcvtzs v4.4s, v0.4s, #31 |
|
45 |
+ ld1 {v1.4s}, [x1], #16 |
|
46 |
+ fcvtzs v5.4s, v1.4s, #31 |
|
47 |
+ st1 {v6.8h}, [x0], #16 |
|
48 |
+ b.ne 1b |
|
49 |
+ ands x2, x2, #15 |
|
50 |
+ b.eq 3f |
|
51 |
+2: ld1 {v2.4s}, [x1], #16 |
|
52 |
+ sqrshrn v4.4h, v4.4s, #16 |
|
53 |
+ fcvtzs v6.4s, v2.4s, #31 |
|
54 |
+ ld1 {v3.4s}, [x1], #16 |
|
55 |
+ sqrshrn2 v4.8h, v5.4s, #16 |
|
56 |
+ fcvtzs v7.4s, v3.4s, #31 |
|
57 |
+ sqrshrn v6.4h, v6.4s, #16 |
|
58 |
+ st1 {v4.8h}, [x0], #16 |
|
59 |
+ sqrshrn2 v6.8h, v7.4s, #16 |
|
60 |
+ st1 {v6.8h}, [x0] |
|
61 |
+ ret |
|
62 |
+3: sqrshrn v4.4h, v4.4s, #16 |
|
63 |
+ sqrshrn2 v4.8h, v5.4s, #16 |
|
64 |
+ st1 {v4.8h}, [x0] |
|
65 |
+ ret |
|
66 |
+endfunc |
|
67 |
+ |
|
68 |
+function ff_conv_fltp_to_s16_2ch_neon, export=1 |
|
69 |
+ ldp x4, x5, [x1] |
|
70 |
+ subs x2, x2, #8 |
|
71 |
+ ld1 {v0.4s}, [x4], #16 |
|
72 |
+ fcvtzs v4.4s, v0.4s, #31 |
|
73 |
+ ld1 {v1.4s}, [x4], #16 |
|
74 |
+ fcvtzs v5.4s, v1.4s, #31 |
|
75 |
+ ld1 {v2.4s}, [x5], #16 |
|
76 |
+ fcvtzs v6.4s, v2.4s, #31 |
|
77 |
+ ld1 {v3.4s}, [x5], #16 |
|
78 |
+ fcvtzs v7.4s, v3.4s, #31 |
|
79 |
+ b.eq 3f |
|
80 |
+ ands x12, x2, #~15 |
|
81 |
+ b.eq 2f |
|
82 |
+1: subs x12, x12, #16 |
|
83 |
+ ld1 {v16.4s}, [x4], #16 |
|
84 |
+ fcvtzs v20.4s, v16.4s, #31 |
|
85 |
+ sri v6.4s, v4.4s, #16 |
|
86 |
+ ld1 {v17.4s}, [x4], #16 |
|
87 |
+ fcvtzs v21.4s, v17.4s, #31 |
|
88 |
+ ld1 {v18.4s}, [x5], #16 |
|
89 |
+ fcvtzs v22.4s, v18.4s, #31 |
|
90 |
+ ld1 {v19.4s}, [x5], #16 |
|
91 |
+ sri v7.4s, v5.4s, #16 |
|
92 |
+ st1 {v6.4s}, [x0], #16 |
|
93 |
+ fcvtzs v23.4s, v19.4s, #31 |
|
94 |
+ st1 {v7.4s}, [x0], #16 |
|
95 |
+ sri v22.4s, v20.4s, #16 |
|
96 |
+ ld1 {v0.4s}, [x4], #16 |
|
97 |
+ sri v23.4s, v21.4s, #16 |
|
98 |
+ st1 {v22.4s}, [x0], #16 |
|
99 |
+ fcvtzs v4.4s, v0.4s, #31 |
|
100 |
+ ld1 {v1.4s}, [x4], #16 |
|
101 |
+ fcvtzs v5.4s, v1.4s, #31 |
|
102 |
+ ld1 {v2.4s}, [x5], #16 |
|
103 |
+ fcvtzs v6.4s, v2.4s, #31 |
|
104 |
+ ld1 {v3.4s}, [x5], #16 |
|
105 |
+ fcvtzs v7.4s, v3.4s, #31 |
|
106 |
+ st1 {v23.4s}, [x0], #16 |
|
107 |
+ b.ne 1b |
|
108 |
+ ands x2, x2, #15 |
|
109 |
+ b.eq 3f |
|
110 |
+2: sri v6.4s, v4.4s, #16 |
|
111 |
+ ld1 {v0.4s}, [x4], #16 |
|
112 |
+ fcvtzs v0.4s, v0.4s, #31 |
|
113 |
+ ld1 {v1.4s}, [x4], #16 |
|
114 |
+ fcvtzs v1.4s, v1.4s, #31 |
|
115 |
+ ld1 {v2.4s}, [x5], #16 |
|
116 |
+ fcvtzs v2.4s, v2.4s, #31 |
|
117 |
+ sri v7.4s, v5.4s, #16 |
|
118 |
+ ld1 {v3.4s}, [x5], #16 |
|
119 |
+ fcvtzs v3.4s, v3.4s, #31 |
|
120 |
+ sri v2.4s, v0.4s, #16 |
|
121 |
+ st1 {v6.4s,v7.4s}, [x0], #32 |
|
122 |
+ sri v3.4s, v1.4s, #16 |
|
123 |
+ st1 {v2.4s,v3.4s}, [x0], #32 |
|
124 |
+ ret |
|
125 |
+3: sri v6.4s, v4.4s, #16 |
|
126 |
+ sri v7.4s, v5.4s, #16 |
|
127 |
+ st1 {v6.4s,v7.4s}, [x0] |
|
128 |
+ ret |
|
129 |
+endfunc |
|
130 |
+ |
|
131 |
+function ff_conv_fltp_to_s16_neon, export=1 |
|
132 |
+ cmp w3, #2 |
|
133 |
+ b.eq X(ff_conv_fltp_to_s16_2ch_neon) |
|
134 |
+ b.gt 1f |
|
135 |
+ ldr x1, [x1] |
|
136 |
+ b X(ff_conv_flt_to_s16_neon) |
|
137 |
+1: |
|
138 |
+ cmp w3, #4 |
|
139 |
+ lsl x12, x3, #1 |
|
140 |
+ b.lt 4f |
|
141 |
+ |
|
142 |
+5: // 4 channels |
|
143 |
+ ldp x4, x5, [x1], #16 |
|
144 |
+ ldp x6, x7, [x1], #16 |
|
145 |
+ mov w9, w2 |
|
146 |
+ mov x8, x0 |
|
147 |
+ ld1 {v4.4s}, [x4], #16 |
|
148 |
+ fcvtzs v4.4s, v4.4s, #31 |
|
149 |
+ ld1 {v5.4s}, [x5], #16 |
|
150 |
+ fcvtzs v5.4s, v5.4s, #31 |
|
151 |
+ ld1 {v6.4s}, [x6], #16 |
|
152 |
+ fcvtzs v6.4s, v6.4s, #31 |
|
153 |
+ ld1 {v7.4s}, [x7], #16 |
|
154 |
+ fcvtzs v7.4s, v7.4s, #31 |
|
155 |
+6: |
|
156 |
+ subs w9, w9, #8 |
|
157 |
+ ld1 {v0.4s}, [x4], #16 |
|
158 |
+ fcvtzs v0.4s, v0.4s, #31 |
|
159 |
+ sri v5.4s, v4.4s, #16 |
|
160 |
+ ld1 {v1.4s}, [x5], #16 |
|
161 |
+ fcvtzs v1.4s, v1.4s, #31 |
|
162 |
+ sri v7.4s, v6.4s, #16 |
|
163 |
+ ld1 {v2.4s}, [x6], #16 |
|
164 |
+ fcvtzs v2.4s, v2.4s, #31 |
|
165 |
+ zip1 v16.4s, v5.4s, v7.4s |
|
166 |
+ ld1 {v3.4s}, [x7], #16 |
|
167 |
+ fcvtzs v3.4s, v3.4s, #31 |
|
168 |
+ zip2 v17.4s, v5.4s, v7.4s |
|
169 |
+ st1 {v16.d}[0], [x8], x12 |
|
170 |
+ sri v1.4s, v0.4s, #16 |
|
171 |
+ st1 {v16.d}[1], [x8], x12 |
|
172 |
+ sri v3.4s, v2.4s, #16 |
|
173 |
+ st1 {v17.d}[0], [x8], x12 |
|
174 |
+ zip1 v18.4s, v1.4s, v3.4s |
|
175 |
+ st1 {v17.d}[1], [x8], x12 |
|
176 |
+ zip2 v19.4s, v1.4s, v3.4s |
|
177 |
+ b.eq 7f |
|
178 |
+ ld1 {v4.4s}, [x4], #16 |
|
179 |
+ fcvtzs v4.4s, v4.4s, #31 |
|
180 |
+ st1 {v18.d}[0], [x8], x12 |
|
181 |
+ ld1 {v5.4s}, [x5], #16 |
|
182 |
+ fcvtzs v5.4s, v5.4s, #31 |
|
183 |
+ st1 {v18.d}[1], [x8], x12 |
|
184 |
+ ld1 {v6.4s}, [x6], #16 |
|
185 |
+ fcvtzs v6.4s, v6.4s, #31 |
|
186 |
+ st1 {v19.d}[0], [x8], x12 |
|
187 |
+ ld1 {v7.4s}, [x7], #16 |
|
188 |
+ fcvtzs v7.4s, v7.4s, #31 |
|
189 |
+ st1 {v19.d}[1], [x8], x12 |
|
190 |
+ b 6b |
|
191 |
+7: |
|
192 |
+ st1 {v18.d}[0], [x8], x12 |
|
193 |
+ st1 {v18.d}[1], [x8], x12 |
|
194 |
+ st1 {v19.d}[0], [x8], x12 |
|
195 |
+ st1 {v19.d}[1], [x8], x12 |
|
196 |
+ subs w3, w3, #4 |
|
197 |
+ b.eq end |
|
198 |
+ cmp w3, #4 |
|
199 |
+ add x0, x0, #8 |
|
200 |
+ b.ge 5b |
|
201 |
+ |
|
202 |
+4: // 2 channels |
|
203 |
+ cmp w3, #2 |
|
204 |
+ b.lt 4f |
|
205 |
+ ldp x4, x5, [x1], #16 |
|
206 |
+ mov w9, w2 |
|
207 |
+ mov x8, x0 |
|
208 |
+ tst w9, #8 |
|
209 |
+ ld1 {v4.4s}, [x4], #16 |
|
210 |
+ fcvtzs v4.4s, v4.4s, #31 |
|
211 |
+ ld1 {v5.4s}, [x5], #16 |
|
212 |
+ fcvtzs v5.4s, v5.4s, #31 |
|
213 |
+ ld1 {v6.4s}, [x4], #16 |
|
214 |
+ fcvtzs v6.4s, v6.4s, #31 |
|
215 |
+ ld1 {v7.4s}, [x5], #16 |
|
216 |
+ fcvtzs v7.4s, v7.4s, #31 |
|
217 |
+ b.eq 6f |
|
218 |
+ subs w9, w9, #8 |
|
219 |
+ b.eq 7f |
|
220 |
+ sri v5.4s, v4.4s, #16 |
|
221 |
+ ld1 {v4.4s}, [x4], #16 |
|
222 |
+ fcvtzs v4.4s, v4.4s, #31 |
|
223 |
+ st1 {v5.s}[0], [x8], x12 |
|
224 |
+ sri v7.4s, v6.4s, #16 |
|
225 |
+ st1 {v5.s}[1], [x8], x12 |
|
226 |
+ ld1 {v6.4s}, [x4], #16 |
|
227 |
+ fcvtzs v6.4s, v6.4s, #31 |
|
228 |
+ st1 {v5.s}[2], [x8], x12 |
|
229 |
+ st1 {v5.s}[3], [x8], x12 |
|
230 |
+ st1 {v7.s}[0], [x8], x12 |
|
231 |
+ st1 {v7.s}[1], [x8], x12 |
|
232 |
+ ld1 {v5.4s}, [x5], #16 |
|
233 |
+ fcvtzs v5.4s, v5.4s, #31 |
|
234 |
+ st1 {v7.s}[2], [x8], x12 |
|
235 |
+ st1 {v7.s}[3], [x8], x12 |
|
236 |
+ ld1 {v7.4s}, [x5], #16 |
|
237 |
+ fcvtzs v7.4s, v7.4s, #31 |
|
238 |
+6: |
|
239 |
+ subs w9, w9, #16 |
|
240 |
+ ld1 {v0.4s}, [x4], #16 |
|
241 |
+ sri v5.4s, v4.4s, #16 |
|
242 |
+ fcvtzs v0.4s, v0.4s, #31 |
|
243 |
+ ld1 {v1.4s}, [x5], #16 |
|
244 |
+ sri v7.4s, v6.4s, #16 |
|
245 |
+ st1 {v5.s}[0], [x8], x12 |
|
246 |
+ st1 {v5.s}[1], [x8], x12 |
|
247 |
+ fcvtzs v1.4s, v1.4s, #31 |
|
248 |
+ st1 {v5.s}[2], [x8], x12 |
|
249 |
+ st1 {v5.s}[3], [x8], x12 |
|
250 |
+ ld1 {v2.4s}, [x4], #16 |
|
251 |
+ st1 {v7.s}[0], [x8], x12 |
|
252 |
+ fcvtzs v2.4s, v2.4s, #31 |
|
253 |
+ st1 {v7.s}[1], [x8], x12 |
|
254 |
+ ld1 {v3.4s}, [x5], #16 |
|
255 |
+ st1 {v7.s}[2], [x8], x12 |
|
256 |
+ fcvtzs v3.4s, v3.4s, #31 |
|
257 |
+ st1 {v7.s}[3], [x8], x12 |
|
258 |
+ sri v1.4s, v0.4s, #16 |
|
259 |
+ sri v3.4s, v2.4s, #16 |
|
260 |
+ b.eq 6f |
|
261 |
+ ld1 {v4.4s}, [x4], #16 |
|
262 |
+ st1 {v1.s}[0], [x8], x12 |
|
263 |
+ fcvtzs v4.4s, v4.4s, #31 |
|
264 |
+ st1 {v1.s}[1], [x8], x12 |
|
265 |
+ ld1 {v5.4s}, [x5], #16 |
|
266 |
+ st1 {v1.s}[2], [x8], x12 |
|
267 |
+ fcvtzs v5.4s, v5.4s, #31 |
|
268 |
+ st1 {v1.s}[3], [x8], x12 |
|
269 |
+ ld1 {v6.4s}, [x4], #16 |
|
270 |
+ st1 {v3.s}[0], [x8], x12 |
|
271 |
+ fcvtzs v6.4s, v6.4s, #31 |
|
272 |
+ st1 {v3.s}[1], [x8], x12 |
|
273 |
+ ld1 {v7.4s}, [x5], #16 |
|
274 |
+ st1 {v3.s}[2], [x8], x12 |
|
275 |
+ fcvtzs v7.4s, v7.4s, #31 |
|
276 |
+ st1 {v3.s}[3], [x8], x12 |
|
277 |
+ b.gt 6b |
|
278 |
+6: |
|
279 |
+ st1 {v1.s}[0], [x8], x12 |
|
280 |
+ st1 {v1.s}[1], [x8], x12 |
|
281 |
+ st1 {v1.s}[2], [x8], x12 |
|
282 |
+ st1 {v1.s}[3], [x8], x12 |
|
283 |
+ st1 {v3.s}[0], [x8], x12 |
|
284 |
+ st1 {v3.s}[1], [x8], x12 |
|
285 |
+ st1 {v3.s}[2], [x8], x12 |
|
286 |
+ st1 {v3.s}[3], [x8], x12 |
|
287 |
+ b 8f |
|
288 |
+7: |
|
289 |
+ sri v5.4s, v4.4s, #16 |
|
290 |
+ sri v7.4s, v6.4s, #16 |
|
291 |
+ st1 {v5.s}[0], [x8], x12 |
|
292 |
+ st1 {v5.s}[1], [x8], x12 |
|
293 |
+ st1 {v5.s}[2], [x8], x12 |
|
294 |
+ st1 {v5.s}[3], [x8], x12 |
|
295 |
+ st1 {v7.s}[0], [x8], x12 |
|
296 |
+ st1 {v7.s}[1], [x8], x12 |
|
297 |
+ st1 {v7.s}[2], [x8], x12 |
|
298 |
+ st1 {v7.s}[3], [x8], x12 |
|
299 |
+8: |
|
300 |
+ subs w3, w3, #2 |
|
301 |
+ add x0, x0, #4 |
|
302 |
+ b.eq end |
|
303 |
+ |
|
304 |
+4: // 1 channel |
|
305 |
+ ldr x4, [x1] |
|
306 |
+ tst w2, #8 |
|
307 |
+ mov w9, w2 |
|
308 |
+ mov x5, x0 |
|
309 |
+ ld1 {v0.4s}, [x4], #16 |
|
310 |
+ fcvtzs v0.4s, v0.4s, #31 |
|
311 |
+ ld1 {v1.4s}, [x4], #16 |
|
312 |
+ fcvtzs v1.4s, v1.4s, #31 |
|
313 |
+ b.ne 8f |
|
314 |
+6: |
|
315 |
+ subs w9, w9, #16 |
|
316 |
+ ld1 {v2.4s}, [x4], #16 |
|
317 |
+ fcvtzs v2.4s, v2.4s, #31 |
|
318 |
+ ld1 {v3.4s}, [x4], #16 |
|
319 |
+ fcvtzs v3.4s, v3.4s, #31 |
|
320 |
+ st1 {v0.h}[1], [x5], x12 |
|
321 |
+ st1 {v0.h}[3], [x5], x12 |
|
322 |
+ st1 {v0.h}[5], [x5], x12 |
|
323 |
+ st1 {v0.h}[7], [x5], x12 |
|
324 |
+ st1 {v1.h}[1], [x5], x12 |
|
325 |
+ st1 {v1.h}[3], [x5], x12 |
|
326 |
+ st1 {v1.h}[5], [x5], x12 |
|
327 |
+ st1 {v1.h}[7], [x5], x12 |
|
328 |
+ b.eq 7f |
|
329 |
+ ld1 {v0.4s}, [x4], #16 |
|
330 |
+ fcvtzs v0.4s, v0.4s, #31 |
|
331 |
+ ld1 {v1.4s}, [x4], #16 |
|
332 |
+ fcvtzs v1.4s, v1.4s, #31 |
|
333 |
+7: |
|
334 |
+ st1 {v2.h}[1], [x5], x12 |
|
335 |
+ st1 {v2.h}[3], [x5], x12 |
|
336 |
+ st1 {v2.h}[5], [x5], x12 |
|
337 |
+ st1 {v2.h}[7], [x5], x12 |
|
338 |
+ st1 {v3.h}[1], [x5], x12 |
|
339 |
+ st1 {v3.h}[3], [x5], x12 |
|
340 |
+ st1 {v3.h}[5], [x5], x12 |
|
341 |
+ st1 {v3.h}[7], [x5], x12 |
|
342 |
+ b.gt 6b |
|
343 |
+ ret |
|
344 |
+8: |
|
345 |
+ subs w9, w9, #8 |
|
346 |
+ st1 {v0.h}[1], [x5], x12 |
|
347 |
+ st1 {v0.h}[3], [x5], x12 |
|
348 |
+ st1 {v0.h}[5], [x5], x12 |
|
349 |
+ st1 {v0.h}[7], [x5], x12 |
|
350 |
+ st1 {v1.h}[1], [x5], x12 |
|
351 |
+ st1 {v1.h}[3], [x5], x12 |
|
352 |
+ st1 {v1.h}[5], [x5], x12 |
|
353 |
+ st1 {v1.h}[7], [x5], x12 |
|
354 |
+ b.eq end |
|
355 |
+ ld1 {v0.4s}, [x4], #16 |
|
356 |
+ fcvtzs v0.4s, v0.4s, #31 |
|
357 |
+ ld1 {v1.4s}, [x4], #16 |
|
358 |
+ fcvtzs v1.4s, v1.4s, #31 |
|
359 |
+ b 6b |
|
360 |
+end: |
|
361 |
+ ret |
|
362 |
+endfunc |
... | ... |
@@ -96,6 +96,7 @@ int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in); |
96 | 96 |
|
97 | 97 |
/* arch-specific initialization functions */ |
98 | 98 |
|
99 |
+void ff_audio_convert_init_aarch64(AudioConvert *ac); |
|
99 | 100 |
void ff_audio_convert_init_arm(AudioConvert *ac); |
100 | 101 |
void ff_audio_convert_init_x86(AudioConvert *ac); |
101 | 102 |
|