Browse code

dct/fft: Give consistent names to fixed/float template files

Diego Biurrun authored on 2013/11/20 05:17:53
Showing 12 changed files
1 1
deleted file mode 100644
... ...
@@ -1,276 +0,0 @@
1
-/*
2
- * Template for the Discrete Cosine Transform for 32 samples
3
- * Copyright (c) 2001, 2002 Fabrice Bellard
4
- *
5
- * This file is part of Libav.
6
- *
7
- * Libav is free software; you can redistribute it and/or
8
- * modify it under the terms of the GNU Lesser General Public
9
- * License as published by the Free Software Foundation; either
10
- * version 2.1 of the License, or (at your option) any later version.
11
- *
12
- * Libav is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
- * Lesser General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU Lesser General Public
18
- * License along with Libav; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
- */
21
-
22
-#include "dct32.h"
23
-#include "mathops.h"
24
-
25
-#if DCT32_FLOAT
26
-#   define dct32 ff_dct32_float
27
-#   define FIXHR(x)       ((float)(x))
28
-#   define MULH3(x, y, s) ((s)*(y)*(x))
29
-#   define INTFLOAT float
30
-#else
31
-#   define dct32 ff_dct32_fixed
32
-#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
33
-#   define MULH3(x, y, s) MULH((s)*(x), y)
34
-#   define INTFLOAT int
35
-#endif
36
-
37
-
38
-/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
39
-
40
-/* cos(i*pi/64) */
41
-
42
-#define COS0_0  FIXHR(0.50060299823519630134/2)
43
-#define COS0_1  FIXHR(0.50547095989754365998/2)
44
-#define COS0_2  FIXHR(0.51544730992262454697/2)
45
-#define COS0_3  FIXHR(0.53104259108978417447/2)
46
-#define COS0_4  FIXHR(0.55310389603444452782/2)
47
-#define COS0_5  FIXHR(0.58293496820613387367/2)
48
-#define COS0_6  FIXHR(0.62250412303566481615/2)
49
-#define COS0_7  FIXHR(0.67480834145500574602/2)
50
-#define COS0_8  FIXHR(0.74453627100229844977/2)
51
-#define COS0_9  FIXHR(0.83934964541552703873/2)
52
-#define COS0_10 FIXHR(0.97256823786196069369/2)
53
-#define COS0_11 FIXHR(1.16943993343288495515/4)
54
-#define COS0_12 FIXHR(1.48416461631416627724/4)
55
-#define COS0_13 FIXHR(2.05778100995341155085/8)
56
-#define COS0_14 FIXHR(3.40760841846871878570/8)
57
-#define COS0_15 FIXHR(10.19000812354805681150/32)
58
-
59
-#define COS1_0 FIXHR(0.50241928618815570551/2)
60
-#define COS1_1 FIXHR(0.52249861493968888062/2)
61
-#define COS1_2 FIXHR(0.56694403481635770368/2)
62
-#define COS1_3 FIXHR(0.64682178335999012954/2)
63
-#define COS1_4 FIXHR(0.78815462345125022473/2)
64
-#define COS1_5 FIXHR(1.06067768599034747134/4)
65
-#define COS1_6 FIXHR(1.72244709823833392782/4)
66
-#define COS1_7 FIXHR(5.10114861868916385802/16)
67
-
68
-#define COS2_0 FIXHR(0.50979557910415916894/2)
69
-#define COS2_1 FIXHR(0.60134488693504528054/2)
70
-#define COS2_2 FIXHR(0.89997622313641570463/2)
71
-#define COS2_3 FIXHR(2.56291544774150617881/8)
72
-
73
-#define COS3_0 FIXHR(0.54119610014619698439/2)
74
-#define COS3_1 FIXHR(1.30656296487637652785/4)
75
-
76
-#define COS4_0 FIXHR(0.70710678118654752439/2)
77
-
78
-/* butterfly operator */
79
-#define BF(a, b, c, s)\
80
-{\
81
-    tmp0 = val##a + val##b;\
82
-    tmp1 = val##a - val##b;\
83
-    val##a = tmp0;\
84
-    val##b = MULH3(tmp1, c, 1<<(s));\
85
-}
86
-
87
-#define BF0(a, b, c, s)\
88
-{\
89
-    tmp0 = tab[a] + tab[b];\
90
-    tmp1 = tab[a] - tab[b];\
91
-    val##a = tmp0;\
92
-    val##b = MULH3(tmp1, c, 1<<(s));\
93
-}
94
-
95
-#define BF1(a, b, c, d)\
96
-{\
97
-    BF(a, b, COS4_0, 1);\
98
-    BF(c, d,-COS4_0, 1);\
99
-    val##c += val##d;\
100
-}
101
-
102
-#define BF2(a, b, c, d)\
103
-{\
104
-    BF(a, b, COS4_0, 1);\
105
-    BF(c, d,-COS4_0, 1);\
106
-    val##c += val##d;\
107
-    val##a += val##c;\
108
-    val##c += val##b;\
109
-    val##b += val##d;\
110
-}
111
-
112
-#define ADD(a, b) val##a += val##b
113
-
114
-/* DCT32 without 1/sqrt(2) coef zero scaling. */
115
-void dct32(INTFLOAT *out, const INTFLOAT *tab)
116
-{
117
-    INTFLOAT tmp0, tmp1;
118
-
119
-    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
120
-             val8 , val9 , val10, val11, val12, val13, val14, val15,
121
-             val16, val17, val18, val19, val20, val21, val22, val23,
122
-             val24, val25, val26, val27, val28, val29, val30, val31;
123
-
124
-    /* pass 1 */
125
-    BF0( 0, 31, COS0_0 , 1);
126
-    BF0(15, 16, COS0_15, 5);
127
-    /* pass 2 */
128
-    BF( 0, 15, COS1_0 , 1);
129
-    BF(16, 31,-COS1_0 , 1);
130
-    /* pass 1 */
131
-    BF0( 7, 24, COS0_7 , 1);
132
-    BF0( 8, 23, COS0_8 , 1);
133
-    /* pass 2 */
134
-    BF( 7,  8, COS1_7 , 4);
135
-    BF(23, 24,-COS1_7 , 4);
136
-    /* pass 3 */
137
-    BF( 0,  7, COS2_0 , 1);
138
-    BF( 8, 15,-COS2_0 , 1);
139
-    BF(16, 23, COS2_0 , 1);
140
-    BF(24, 31,-COS2_0 , 1);
141
-    /* pass 1 */
142
-    BF0( 3, 28, COS0_3 , 1);
143
-    BF0(12, 19, COS0_12, 2);
144
-    /* pass 2 */
145
-    BF( 3, 12, COS1_3 , 1);
146
-    BF(19, 28,-COS1_3 , 1);
147
-    /* pass 1 */
148
-    BF0( 4, 27, COS0_4 , 1);
149
-    BF0(11, 20, COS0_11, 2);
150
-    /* pass 2 */
151
-    BF( 4, 11, COS1_4 , 1);
152
-    BF(20, 27,-COS1_4 , 1);
153
-    /* pass 3 */
154
-    BF( 3,  4, COS2_3 , 3);
155
-    BF(11, 12,-COS2_3 , 3);
156
-    BF(19, 20, COS2_3 , 3);
157
-    BF(27, 28,-COS2_3 , 3);
158
-    /* pass 4 */
159
-    BF( 0,  3, COS3_0 , 1);
160
-    BF( 4,  7,-COS3_0 , 1);
161
-    BF( 8, 11, COS3_0 , 1);
162
-    BF(12, 15,-COS3_0 , 1);
163
-    BF(16, 19, COS3_0 , 1);
164
-    BF(20, 23,-COS3_0 , 1);
165
-    BF(24, 27, COS3_0 , 1);
166
-    BF(28, 31,-COS3_0 , 1);
167
-
168
-
169
-
170
-    /* pass 1 */
171
-    BF0( 1, 30, COS0_1 , 1);
172
-    BF0(14, 17, COS0_14, 3);
173
-    /* pass 2 */
174
-    BF( 1, 14, COS1_1 , 1);
175
-    BF(17, 30,-COS1_1 , 1);
176
-    /* pass 1 */
177
-    BF0( 6, 25, COS0_6 , 1);
178
-    BF0( 9, 22, COS0_9 , 1);
179
-    /* pass 2 */
180
-    BF( 6,  9, COS1_6 , 2);
181
-    BF(22, 25,-COS1_6 , 2);
182
-    /* pass 3 */
183
-    BF( 1,  6, COS2_1 , 1);
184
-    BF( 9, 14,-COS2_1 , 1);
185
-    BF(17, 22, COS2_1 , 1);
186
-    BF(25, 30,-COS2_1 , 1);
187
-
188
-    /* pass 1 */
189
-    BF0( 2, 29, COS0_2 , 1);
190
-    BF0(13, 18, COS0_13, 3);
191
-    /* pass 2 */
192
-    BF( 2, 13, COS1_2 , 1);
193
-    BF(18, 29,-COS1_2 , 1);
194
-    /* pass 1 */
195
-    BF0( 5, 26, COS0_5 , 1);
196
-    BF0(10, 21, COS0_10, 1);
197
-    /* pass 2 */
198
-    BF( 5, 10, COS1_5 , 2);
199
-    BF(21, 26,-COS1_5 , 2);
200
-    /* pass 3 */
201
-    BF( 2,  5, COS2_2 , 1);
202
-    BF(10, 13,-COS2_2 , 1);
203
-    BF(18, 21, COS2_2 , 1);
204
-    BF(26, 29,-COS2_2 , 1);
205
-    /* pass 4 */
206
-    BF( 1,  2, COS3_1 , 2);
207
-    BF( 5,  6,-COS3_1 , 2);
208
-    BF( 9, 10, COS3_1 , 2);
209
-    BF(13, 14,-COS3_1 , 2);
210
-    BF(17, 18, COS3_1 , 2);
211
-    BF(21, 22,-COS3_1 , 2);
212
-    BF(25, 26, COS3_1 , 2);
213
-    BF(29, 30,-COS3_1 , 2);
214
-
215
-    /* pass 5 */
216
-    BF1( 0,  1,  2,  3);
217
-    BF2( 4,  5,  6,  7);
218
-    BF1( 8,  9, 10, 11);
219
-    BF2(12, 13, 14, 15);
220
-    BF1(16, 17, 18, 19);
221
-    BF2(20, 21, 22, 23);
222
-    BF1(24, 25, 26, 27);
223
-    BF2(28, 29, 30, 31);
224
-
225
-    /* pass 6 */
226
-
227
-    ADD( 8, 12);
228
-    ADD(12, 10);
229
-    ADD(10, 14);
230
-    ADD(14,  9);
231
-    ADD( 9, 13);
232
-    ADD(13, 11);
233
-    ADD(11, 15);
234
-
235
-    out[ 0] = val0;
236
-    out[16] = val1;
237
-    out[ 8] = val2;
238
-    out[24] = val3;
239
-    out[ 4] = val4;
240
-    out[20] = val5;
241
-    out[12] = val6;
242
-    out[28] = val7;
243
-    out[ 2] = val8;
244
-    out[18] = val9;
245
-    out[10] = val10;
246
-    out[26] = val11;
247
-    out[ 6] = val12;
248
-    out[22] = val13;
249
-    out[14] = val14;
250
-    out[30] = val15;
251
-
252
-    ADD(24, 28);
253
-    ADD(28, 26);
254
-    ADD(26, 30);
255
-    ADD(30, 25);
256
-    ADD(25, 29);
257
-    ADD(29, 27);
258
-    ADD(27, 31);
259
-
260
-    out[ 1] = val16 + val24;
261
-    out[17] = val17 + val25;
262
-    out[ 9] = val18 + val26;
263
-    out[25] = val19 + val27;
264
-    out[ 5] = val20 + val28;
265
-    out[21] = val21 + val29;
266
-    out[13] = val22 + val30;
267
-    out[29] = val23 + val31;
268
-    out[ 3] = val24 + val20;
269
-    out[19] = val25 + val21;
270
-    out[11] = val26 + val22;
271
-    out[27] = val27 + val23;
272
-    out[ 7] = val28 + val18;
273
-    out[23] = val29 + val19;
274
-    out[15] = val30 + val17;
275
-    out[31] = val31;
276
-}
... ...
@@ -17,4 +17,4 @@
17 17
  */
18 18
 
19 19
 #define DCT32_FLOAT 0
20
-#include "dct32.c"
20
+#include "dct32_template.c"
... ...
@@ -17,4 +17,4 @@
17 17
  */
18 18
 
19 19
 #define DCT32_FLOAT 1
20
-#include "dct32.c"
20
+#include "dct32_template.c"
21 21
new file mode 100644
... ...
@@ -0,0 +1,276 @@
0
+/*
1
+ * Template for the Discrete Cosine Transform for 32 samples
2
+ * Copyright (c) 2001, 2002 Fabrice Bellard
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "dct32.h"
22
+#include "mathops.h"
23
+
24
+#if DCT32_FLOAT
25
+#   define dct32 ff_dct32_float
26
+#   define FIXHR(x)       ((float)(x))
27
+#   define MULH3(x, y, s) ((s)*(y)*(x))
28
+#   define INTFLOAT float
29
+#else
30
+#   define dct32 ff_dct32_fixed
31
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
32
+#   define MULH3(x, y, s) MULH((s)*(x), y)
33
+#   define INTFLOAT int
34
+#endif
35
+
36
+
37
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
38
+
39
+/* cos(i*pi/64) */
40
+
41
+#define COS0_0  FIXHR(0.50060299823519630134/2)
42
+#define COS0_1  FIXHR(0.50547095989754365998/2)
43
+#define COS0_2  FIXHR(0.51544730992262454697/2)
44
+#define COS0_3  FIXHR(0.53104259108978417447/2)
45
+#define COS0_4  FIXHR(0.55310389603444452782/2)
46
+#define COS0_5  FIXHR(0.58293496820613387367/2)
47
+#define COS0_6  FIXHR(0.62250412303566481615/2)
48
+#define COS0_7  FIXHR(0.67480834145500574602/2)
49
+#define COS0_8  FIXHR(0.74453627100229844977/2)
50
+#define COS0_9  FIXHR(0.83934964541552703873/2)
51
+#define COS0_10 FIXHR(0.97256823786196069369/2)
52
+#define COS0_11 FIXHR(1.16943993343288495515/4)
53
+#define COS0_12 FIXHR(1.48416461631416627724/4)
54
+#define COS0_13 FIXHR(2.05778100995341155085/8)
55
+#define COS0_14 FIXHR(3.40760841846871878570/8)
56
+#define COS0_15 FIXHR(10.19000812354805681150/32)
57
+
58
+#define COS1_0 FIXHR(0.50241928618815570551/2)
59
+#define COS1_1 FIXHR(0.52249861493968888062/2)
60
+#define COS1_2 FIXHR(0.56694403481635770368/2)
61
+#define COS1_3 FIXHR(0.64682178335999012954/2)
62
+#define COS1_4 FIXHR(0.78815462345125022473/2)
63
+#define COS1_5 FIXHR(1.06067768599034747134/4)
64
+#define COS1_6 FIXHR(1.72244709823833392782/4)
65
+#define COS1_7 FIXHR(5.10114861868916385802/16)
66
+
67
+#define COS2_0 FIXHR(0.50979557910415916894/2)
68
+#define COS2_1 FIXHR(0.60134488693504528054/2)
69
+#define COS2_2 FIXHR(0.89997622313641570463/2)
70
+#define COS2_3 FIXHR(2.56291544774150617881/8)
71
+
72
+#define COS3_0 FIXHR(0.54119610014619698439/2)
73
+#define COS3_1 FIXHR(1.30656296487637652785/4)
74
+
75
+#define COS4_0 FIXHR(0.70710678118654752439/2)
76
+
77
+/* butterfly operator */
78
+#define BF(a, b, c, s)\
79
+{\
80
+    tmp0 = val##a + val##b;\
81
+    tmp1 = val##a - val##b;\
82
+    val##a = tmp0;\
83
+    val##b = MULH3(tmp1, c, 1<<(s));\
84
+}
85
+
86
+#define BF0(a, b, c, s)\
87
+{\
88
+    tmp0 = tab[a] + tab[b];\
89
+    tmp1 = tab[a] - tab[b];\
90
+    val##a = tmp0;\
91
+    val##b = MULH3(tmp1, c, 1<<(s));\
92
+}
93
+
94
+#define BF1(a, b, c, d)\
95
+{\
96
+    BF(a, b, COS4_0, 1);\
97
+    BF(c, d,-COS4_0, 1);\
98
+    val##c += val##d;\
99
+}
100
+
101
+#define BF2(a, b, c, d)\
102
+{\
103
+    BF(a, b, COS4_0, 1);\
104
+    BF(c, d,-COS4_0, 1);\
105
+    val##c += val##d;\
106
+    val##a += val##c;\
107
+    val##c += val##b;\
108
+    val##b += val##d;\
109
+}
110
+
111
+#define ADD(a, b) val##a += val##b
112
+
113
+/* DCT32 without 1/sqrt(2) coef zero scaling. */
114
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
115
+{
116
+    INTFLOAT tmp0, tmp1;
117
+
118
+    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
119
+             val8 , val9 , val10, val11, val12, val13, val14, val15,
120
+             val16, val17, val18, val19, val20, val21, val22, val23,
121
+             val24, val25, val26, val27, val28, val29, val30, val31;
122
+
123
+    /* pass 1 */
124
+    BF0( 0, 31, COS0_0 , 1);
125
+    BF0(15, 16, COS0_15, 5);
126
+    /* pass 2 */
127
+    BF( 0, 15, COS1_0 , 1);
128
+    BF(16, 31,-COS1_0 , 1);
129
+    /* pass 1 */
130
+    BF0( 7, 24, COS0_7 , 1);
131
+    BF0( 8, 23, COS0_8 , 1);
132
+    /* pass 2 */
133
+    BF( 7,  8, COS1_7 , 4);
134
+    BF(23, 24,-COS1_7 , 4);
135
+    /* pass 3 */
136
+    BF( 0,  7, COS2_0 , 1);
137
+    BF( 8, 15,-COS2_0 , 1);
138
+    BF(16, 23, COS2_0 , 1);
139
+    BF(24, 31,-COS2_0 , 1);
140
+    /* pass 1 */
141
+    BF0( 3, 28, COS0_3 , 1);
142
+    BF0(12, 19, COS0_12, 2);
143
+    /* pass 2 */
144
+    BF( 3, 12, COS1_3 , 1);
145
+    BF(19, 28,-COS1_3 , 1);
146
+    /* pass 1 */
147
+    BF0( 4, 27, COS0_4 , 1);
148
+    BF0(11, 20, COS0_11, 2);
149
+    /* pass 2 */
150
+    BF( 4, 11, COS1_4 , 1);
151
+    BF(20, 27,-COS1_4 , 1);
152
+    /* pass 3 */
153
+    BF( 3,  4, COS2_3 , 3);
154
+    BF(11, 12,-COS2_3 , 3);
155
+    BF(19, 20, COS2_3 , 3);
156
+    BF(27, 28,-COS2_3 , 3);
157
+    /* pass 4 */
158
+    BF( 0,  3, COS3_0 , 1);
159
+    BF( 4,  7,-COS3_0 , 1);
160
+    BF( 8, 11, COS3_0 , 1);
161
+    BF(12, 15,-COS3_0 , 1);
162
+    BF(16, 19, COS3_0 , 1);
163
+    BF(20, 23,-COS3_0 , 1);
164
+    BF(24, 27, COS3_0 , 1);
165
+    BF(28, 31,-COS3_0 , 1);
166
+
167
+
168
+
169
+    /* pass 1 */
170
+    BF0( 1, 30, COS0_1 , 1);
171
+    BF0(14, 17, COS0_14, 3);
172
+    /* pass 2 */
173
+    BF( 1, 14, COS1_1 , 1);
174
+    BF(17, 30,-COS1_1 , 1);
175
+    /* pass 1 */
176
+    BF0( 6, 25, COS0_6 , 1);
177
+    BF0( 9, 22, COS0_9 , 1);
178
+    /* pass 2 */
179
+    BF( 6,  9, COS1_6 , 2);
180
+    BF(22, 25,-COS1_6 , 2);
181
+    /* pass 3 */
182
+    BF( 1,  6, COS2_1 , 1);
183
+    BF( 9, 14,-COS2_1 , 1);
184
+    BF(17, 22, COS2_1 , 1);
185
+    BF(25, 30,-COS2_1 , 1);
186
+
187
+    /* pass 1 */
188
+    BF0( 2, 29, COS0_2 , 1);
189
+    BF0(13, 18, COS0_13, 3);
190
+    /* pass 2 */
191
+    BF( 2, 13, COS1_2 , 1);
192
+    BF(18, 29,-COS1_2 , 1);
193
+    /* pass 1 */
194
+    BF0( 5, 26, COS0_5 , 1);
195
+    BF0(10, 21, COS0_10, 1);
196
+    /* pass 2 */
197
+    BF( 5, 10, COS1_5 , 2);
198
+    BF(21, 26,-COS1_5 , 2);
199
+    /* pass 3 */
200
+    BF( 2,  5, COS2_2 , 1);
201
+    BF(10, 13,-COS2_2 , 1);
202
+    BF(18, 21, COS2_2 , 1);
203
+    BF(26, 29,-COS2_2 , 1);
204
+    /* pass 4 */
205
+    BF( 1,  2, COS3_1 , 2);
206
+    BF( 5,  6,-COS3_1 , 2);
207
+    BF( 9, 10, COS3_1 , 2);
208
+    BF(13, 14,-COS3_1 , 2);
209
+    BF(17, 18, COS3_1 , 2);
210
+    BF(21, 22,-COS3_1 , 2);
211
+    BF(25, 26, COS3_1 , 2);
212
+    BF(29, 30,-COS3_1 , 2);
213
+
214
+    /* pass 5 */
215
+    BF1( 0,  1,  2,  3);
216
+    BF2( 4,  5,  6,  7);
217
+    BF1( 8,  9, 10, 11);
218
+    BF2(12, 13, 14, 15);
219
+    BF1(16, 17, 18, 19);
220
+    BF2(20, 21, 22, 23);
221
+    BF1(24, 25, 26, 27);
222
+    BF2(28, 29, 30, 31);
223
+
224
+    /* pass 6 */
225
+
226
+    ADD( 8, 12);
227
+    ADD(12, 10);
228
+    ADD(10, 14);
229
+    ADD(14,  9);
230
+    ADD( 9, 13);
231
+    ADD(13, 11);
232
+    ADD(11, 15);
233
+
234
+    out[ 0] = val0;
235
+    out[16] = val1;
236
+    out[ 8] = val2;
237
+    out[24] = val3;
238
+    out[ 4] = val4;
239
+    out[20] = val5;
240
+    out[12] = val6;
241
+    out[28] = val7;
242
+    out[ 2] = val8;
243
+    out[18] = val9;
244
+    out[10] = val10;
245
+    out[26] = val11;
246
+    out[ 6] = val12;
247
+    out[22] = val13;
248
+    out[14] = val14;
249
+    out[30] = val15;
250
+
251
+    ADD(24, 28);
252
+    ADD(28, 26);
253
+    ADD(26, 30);
254
+    ADD(30, 25);
255
+    ADD(25, 29);
256
+    ADD(29, 27);
257
+    ADD(27, 31);
258
+
259
+    out[ 1] = val16 + val24;
260
+    out[17] = val17 + val25;
261
+    out[ 9] = val18 + val26;
262
+    out[25] = val19 + val27;
263
+    out[ 5] = val20 + val28;
264
+    out[21] = val21 + val29;
265
+    out[13] = val22 + val30;
266
+    out[29] = val23 + val31;
267
+    out[ 3] = val24 + val20;
268
+    out[19] = val25 + val21;
269
+    out[11] = val26 + val22;
270
+    out[27] = val27 + val23;
271
+    out[ 7] = val28 + val18;
272
+    out[23] = val29 + val19;
273
+    out[15] = val30 + val17;
274
+    out[31] = val31;
275
+}
0 276
deleted file mode 100644
... ...
@@ -1,352 +0,0 @@
1
-/*
2
- * FFT/IFFT transforms
3
- * Copyright (c) 2008 Loren Merritt
4
- * Copyright (c) 2002 Fabrice Bellard
5
- * Partly based on libdjbfft by D. J. Bernstein
6
- *
7
- * This file is part of Libav.
8
- *
9
- * Libav is free software; you can redistribute it and/or
10
- * modify it under the terms of the GNU Lesser General Public
11
- * License as published by the Free Software Foundation; either
12
- * version 2.1 of the License, or (at your option) any later version.
13
- *
14
- * Libav is distributed in the hope that it will be useful,
15
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
- * Lesser General Public License for more details.
18
- *
19
- * You should have received a copy of the GNU Lesser General Public
20
- * License along with Libav; if not, write to the Free Software
21
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
- */
23
-
24
-/**
25
- * @file
26
- * FFT/IFFT transforms.
27
- */
28
-
29
-#include <stdlib.h>
30
-#include <string.h>
31
-#include "libavutil/mathematics.h"
32
-#include "fft.h"
33
-#include "fft-internal.h"
34
-
35
-/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
36
-#if !CONFIG_HARDCODED_TABLES
37
-COSTABLE(16);
38
-COSTABLE(32);
39
-COSTABLE(64);
40
-COSTABLE(128);
41
-COSTABLE(256);
42
-COSTABLE(512);
43
-COSTABLE(1024);
44
-COSTABLE(2048);
45
-COSTABLE(4096);
46
-COSTABLE(8192);
47
-COSTABLE(16384);
48
-COSTABLE(32768);
49
-COSTABLE(65536);
50
-#endif
51
-COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
52
-    NULL, NULL, NULL, NULL,
53
-    FFT_NAME(ff_cos_16),
54
-    FFT_NAME(ff_cos_32),
55
-    FFT_NAME(ff_cos_64),
56
-    FFT_NAME(ff_cos_128),
57
-    FFT_NAME(ff_cos_256),
58
-    FFT_NAME(ff_cos_512),
59
-    FFT_NAME(ff_cos_1024),
60
-    FFT_NAME(ff_cos_2048),
61
-    FFT_NAME(ff_cos_4096),
62
-    FFT_NAME(ff_cos_8192),
63
-    FFT_NAME(ff_cos_16384),
64
-    FFT_NAME(ff_cos_32768),
65
-    FFT_NAME(ff_cos_65536),
66
-};
67
-
68
-static void fft_permute_c(FFTContext *s, FFTComplex *z);
69
-static void fft_calc_c(FFTContext *s, FFTComplex *z);
70
-
71
-static int split_radix_permutation(int i, int n, int inverse)
72
-{
73
-    int m;
74
-    if(n <= 2) return i&1;
75
-    m = n >> 1;
76
-    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
77
-    m >>= 1;
78
-    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
79
-    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
80
-}
81
-
82
-av_cold void ff_init_ff_cos_tabs(int index)
83
-{
84
-#if !CONFIG_HARDCODED_TABLES
85
-    int i;
86
-    int m = 1<<index;
87
-    double freq = 2*M_PI/m;
88
-    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
89
-    for(i=0; i<=m/4; i++)
90
-        tab[i] = FIX15(cos(i*freq));
91
-    for(i=1; i<m/4; i++)
92
-        tab[m/2-i] = tab[i];
93
-#endif
94
-}
95
-
96
-static const int avx_tab[] = {
97
-    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
98
-};
99
-
100
-static int is_second_half_of_fft32(int i, int n)
101
-{
102
-    if (n <= 32)
103
-        return i >= 16;
104
-    else if (i < n/2)
105
-        return is_second_half_of_fft32(i, n/2);
106
-    else if (i < 3*n/4)
107
-        return is_second_half_of_fft32(i - n/2, n/4);
108
-    else
109
-        return is_second_half_of_fft32(i - 3*n/4, n/4);
110
-}
111
-
112
-static av_cold void fft_perm_avx(FFTContext *s)
113
-{
114
-    int i;
115
-    int n = 1 << s->nbits;
116
-
117
-    for (i = 0; i < n; i += 16) {
118
-        int k;
119
-        if (is_second_half_of_fft32(i, n)) {
120
-            for (k = 0; k < 16; k++)
121
-                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
122
-                    i + avx_tab[k];
123
-
124
-        } else {
125
-            for (k = 0; k < 16; k++) {
126
-                int j = i + k;
127
-                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
128
-                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
129
-            }
130
-        }
131
-    }
132
-}
133
-
134
-av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
135
-{
136
-    int i, j, n;
137
-
138
-    if (nbits < 2 || nbits > 16)
139
-        goto fail;
140
-    s->nbits = nbits;
141
-    n = 1 << nbits;
142
-
143
-    s->revtab = av_malloc(n * sizeof(uint16_t));
144
-    if (!s->revtab)
145
-        goto fail;
146
-    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
147
-    if (!s->tmp_buf)
148
-        goto fail;
149
-    s->inverse = inverse;
150
-    s->fft_permutation = FF_FFT_PERM_DEFAULT;
151
-
152
-    s->fft_permute = fft_permute_c;
153
-    s->fft_calc    = fft_calc_c;
154
-#if CONFIG_MDCT
155
-    s->imdct_calc  = ff_imdct_calc_c;
156
-    s->imdct_half  = ff_imdct_half_c;
157
-    s->mdct_calc   = ff_mdct_calc_c;
158
-#endif
159
-
160
-#if CONFIG_FFT_FLOAT
161
-    if (ARCH_ARM)     ff_fft_init_arm(s);
162
-    if (ARCH_PPC)     ff_fft_init_ppc(s);
163
-    if (ARCH_X86)     ff_fft_init_x86(s);
164
-    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
165
-#else
166
-    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
167
-    if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
168
-#endif
169
-
170
-    for(j=4; j<=nbits; j++) {
171
-        ff_init_ff_cos_tabs(j);
172
-    }
173
-
174
-    if (s->fft_permutation == FF_FFT_PERM_AVX) {
175
-        fft_perm_avx(s);
176
-    } else {
177
-        for(i=0; i<n; i++) {
178
-            int j = i;
179
-            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
180
-                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
181
-            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
182
-        }
183
-    }
184
-
185
-    return 0;
186
- fail:
187
-    av_freep(&s->revtab);
188
-    av_freep(&s->tmp_buf);
189
-    return -1;
190
-}
191
-
192
-static void fft_permute_c(FFTContext *s, FFTComplex *z)
193
-{
194
-    int j, np;
195
-    const uint16_t *revtab = s->revtab;
196
-    np = 1 << s->nbits;
197
-    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
198
-    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
199
-    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
200
-}
201
-
202
-av_cold void ff_fft_end(FFTContext *s)
203
-{
204
-    av_freep(&s->revtab);
205
-    av_freep(&s->tmp_buf);
206
-}
207
-
208
-#define BUTTERFLIES(a0,a1,a2,a3) {\
209
-    BF(t3, t5, t5, t1);\
210
-    BF(a2.re, a0.re, a0.re, t5);\
211
-    BF(a3.im, a1.im, a1.im, t3);\
212
-    BF(t4, t6, t2, t6);\
213
-    BF(a3.re, a1.re, a1.re, t4);\
214
-    BF(a2.im, a0.im, a0.im, t6);\
215
-}
216
-
217
-// force loading all the inputs before storing any.
218
-// this is slightly slower for small data, but avoids store->load aliasing
219
-// for addresses separated by large powers of 2.
220
-#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
221
-    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
222
-    BF(t3, t5, t5, t1);\
223
-    BF(a2.re, a0.re, r0, t5);\
224
-    BF(a3.im, a1.im, i1, t3);\
225
-    BF(t4, t6, t2, t6);\
226
-    BF(a3.re, a1.re, r1, t4);\
227
-    BF(a2.im, a0.im, i0, t6);\
228
-}
229
-
230
-#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
231
-    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
232
-    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
233
-    BUTTERFLIES(a0,a1,a2,a3)\
234
-}
235
-
236
-#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
237
-    t1 = a2.re;\
238
-    t2 = a2.im;\
239
-    t5 = a3.re;\
240
-    t6 = a3.im;\
241
-    BUTTERFLIES(a0,a1,a2,a3)\
242
-}
243
-
244
-/* z[0...8n-1], w[1...2n-1] */
245
-#define PASS(name)\
246
-static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
247
-{\
248
-    FFTDouble t1, t2, t3, t4, t5, t6;\
249
-    int o1 = 2*n;\
250
-    int o2 = 4*n;\
251
-    int o3 = 6*n;\
252
-    const FFTSample *wim = wre+o1;\
253
-    n--;\
254
-\
255
-    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
256
-    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
257
-    do {\
258
-        z += 2;\
259
-        wre += 2;\
260
-        wim -= 2;\
261
-        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
262
-        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
263
-    } while(--n);\
264
-}
265
-
266
-PASS(pass)
267
-#undef BUTTERFLIES
268
-#define BUTTERFLIES BUTTERFLIES_BIG
269
-PASS(pass_big)
270
-
271
-#define DECL_FFT(n,n2,n4)\
272
-static void fft##n(FFTComplex *z)\
273
-{\
274
-    fft##n2(z);\
275
-    fft##n4(z+n4*2);\
276
-    fft##n4(z+n4*3);\
277
-    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
278
-}
279
-
280
-static void fft4(FFTComplex *z)
281
-{
282
-    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
283
-
284
-    BF(t3, t1, z[0].re, z[1].re);
285
-    BF(t8, t6, z[3].re, z[2].re);
286
-    BF(z[2].re, z[0].re, t1, t6);
287
-    BF(t4, t2, z[0].im, z[1].im);
288
-    BF(t7, t5, z[2].im, z[3].im);
289
-    BF(z[3].im, z[1].im, t4, t8);
290
-    BF(z[3].re, z[1].re, t3, t7);
291
-    BF(z[2].im, z[0].im, t2, t5);
292
-}
293
-
294
-static void fft8(FFTComplex *z)
295
-{
296
-    FFTDouble t1, t2, t3, t4, t5, t6;
297
-
298
-    fft4(z);
299
-
300
-    BF(t1, z[5].re, z[4].re, -z[5].re);
301
-    BF(t2, z[5].im, z[4].im, -z[5].im);
302
-    BF(t5, z[7].re, z[6].re, -z[7].re);
303
-    BF(t6, z[7].im, z[6].im, -z[7].im);
304
-
305
-    BUTTERFLIES(z[0],z[2],z[4],z[6]);
306
-    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
307
-}
308
-
309
-#if !CONFIG_SMALL
310
-static void fft16(FFTComplex *z)
311
-{
312
-    FFTDouble t1, t2, t3, t4, t5, t6;
313
-    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
314
-    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
315
-
316
-    fft8(z);
317
-    fft4(z+8);
318
-    fft4(z+12);
319
-
320
-    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
321
-    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
322
-    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
323
-    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
324
-}
325
-#else
326
-DECL_FFT(16,8,4)
327
-#endif
328
-DECL_FFT(32,16,8)
329
-DECL_FFT(64,32,16)
330
-DECL_FFT(128,64,32)
331
-DECL_FFT(256,128,64)
332
-DECL_FFT(512,256,128)
333
-#if !CONFIG_SMALL
334
-#define pass pass_big
335
-#endif
336
-DECL_FFT(1024,512,256)
337
-DECL_FFT(2048,1024,512)
338
-DECL_FFT(4096,2048,1024)
339
-DECL_FFT(8192,4096,2048)
340
-DECL_FFT(16384,8192,4096)
341
-DECL_FFT(32768,16384,8192)
342
-DECL_FFT(65536,32768,16384)
343
-
344
-static void (* const fft_dispatch[])(FFTComplex*) = {
345
-    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
346
-    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
347
-};
348
-
349
-static void fft_calc_c(FFTContext *s, FFTComplex *z)
350
-{
351
-    fft_dispatch[s->nbits-2](z);
352
-}
... ...
@@ -17,4 +17,4 @@
17 17
  */
18 18
 
19 19
 #define CONFIG_FFT_FLOAT 0
20
-#include "fft.c"
20
+#include "fft_template.c"
... ...
@@ -17,4 +17,4 @@
17 17
  */
18 18
 
19 19
 #define CONFIG_FFT_FLOAT 1
20
-#include "fft.c"
20
+#include "fft_template.c"
21 21
new file mode 100644
... ...
@@ -0,0 +1,352 @@
0
+/*
1
+ * FFT/IFFT transforms
2
+ * Copyright (c) 2008 Loren Merritt
3
+ * Copyright (c) 2002 Fabrice Bellard
4
+ * Partly based on libdjbfft by D. J. Bernstein
5
+ *
6
+ * This file is part of Libav.
7
+ *
8
+ * Libav is free software; you can redistribute it and/or
9
+ * modify it under the terms of the GNU Lesser General Public
10
+ * License as published by the Free Software Foundation; either
11
+ * version 2.1 of the License, or (at your option) any later version.
12
+ *
13
+ * Libav is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
+ * Lesser General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU Lesser General Public
19
+ * License along with Libav; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+ */
22
+
23
+/**
24
+ * @file
25
+ * FFT/IFFT transforms.
26
+ */
27
+
28
+#include <stdlib.h>
29
+#include <string.h>
30
+#include "libavutil/mathematics.h"
31
+#include "fft.h"
32
+#include "fft-internal.h"
33
+
34
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
35
+#if !CONFIG_HARDCODED_TABLES
36
+COSTABLE(16);
37
+COSTABLE(32);
38
+COSTABLE(64);
39
+COSTABLE(128);
40
+COSTABLE(256);
41
+COSTABLE(512);
42
+COSTABLE(1024);
43
+COSTABLE(2048);
44
+COSTABLE(4096);
45
+COSTABLE(8192);
46
+COSTABLE(16384);
47
+COSTABLE(32768);
48
+COSTABLE(65536);
49
+#endif
50
+COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
51
+    NULL, NULL, NULL, NULL,
52
+    FFT_NAME(ff_cos_16),
53
+    FFT_NAME(ff_cos_32),
54
+    FFT_NAME(ff_cos_64),
55
+    FFT_NAME(ff_cos_128),
56
+    FFT_NAME(ff_cos_256),
57
+    FFT_NAME(ff_cos_512),
58
+    FFT_NAME(ff_cos_1024),
59
+    FFT_NAME(ff_cos_2048),
60
+    FFT_NAME(ff_cos_4096),
61
+    FFT_NAME(ff_cos_8192),
62
+    FFT_NAME(ff_cos_16384),
63
+    FFT_NAME(ff_cos_32768),
64
+    FFT_NAME(ff_cos_65536),
65
+};
66
+
67
+static void fft_permute_c(FFTContext *s, FFTComplex *z);
68
+static void fft_calc_c(FFTContext *s, FFTComplex *z);
69
+
70
+static int split_radix_permutation(int i, int n, int inverse)
71
+{
72
+    int m;
73
+    if(n <= 2) return i&1;
74
+    m = n >> 1;
75
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
76
+    m >>= 1;
77
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
78
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
79
+}
80
+
81
+av_cold void ff_init_ff_cos_tabs(int index)
82
+{
83
+#if !CONFIG_HARDCODED_TABLES
84
+    int i;
85
+    int m = 1<<index;
86
+    double freq = 2*M_PI/m;
87
+    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
88
+    for(i=0; i<=m/4; i++)
89
+        tab[i] = FIX15(cos(i*freq));
90
+    for(i=1; i<m/4; i++)
91
+        tab[m/2-i] = tab[i];
92
+#endif
93
+}
94
+
95
+static const int avx_tab[] = {
96
+    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
97
+};
98
+
99
+static int is_second_half_of_fft32(int i, int n)
100
+{
101
+    if (n <= 32)
102
+        return i >= 16;
103
+    else if (i < n/2)
104
+        return is_second_half_of_fft32(i, n/2);
105
+    else if (i < 3*n/4)
106
+        return is_second_half_of_fft32(i - n/2, n/4);
107
+    else
108
+        return is_second_half_of_fft32(i - 3*n/4, n/4);
109
+}
110
+
111
+static av_cold void fft_perm_avx(FFTContext *s)
112
+{
113
+    int i;
114
+    int n = 1 << s->nbits;
115
+
116
+    for (i = 0; i < n; i += 16) {
117
+        int k;
118
+        if (is_second_half_of_fft32(i, n)) {
119
+            for (k = 0; k < 16; k++)
120
+                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
121
+                    i + avx_tab[k];
122
+
123
+        } else {
124
+            for (k = 0; k < 16; k++) {
125
+                int j = i + k;
126
+                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
127
+                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
128
+            }
129
+        }
130
+    }
131
+}
132
+
133
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
134
+{
135
+    int i, j, n;
136
+
137
+    if (nbits < 2 || nbits > 16)
138
+        goto fail;
139
+    s->nbits = nbits;
140
+    n = 1 << nbits;
141
+
142
+    s->revtab = av_malloc(n * sizeof(uint16_t));
143
+    if (!s->revtab)
144
+        goto fail;
145
+    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
146
+    if (!s->tmp_buf)
147
+        goto fail;
148
+    s->inverse = inverse;
149
+    s->fft_permutation = FF_FFT_PERM_DEFAULT;
150
+
151
+    s->fft_permute = fft_permute_c;
152
+    s->fft_calc    = fft_calc_c;
153
+#if CONFIG_MDCT
154
+    s->imdct_calc  = ff_imdct_calc_c;
155
+    s->imdct_half  = ff_imdct_half_c;
156
+    s->mdct_calc   = ff_mdct_calc_c;
157
+#endif
158
+
159
+#if CONFIG_FFT_FLOAT
160
+    if (ARCH_ARM)     ff_fft_init_arm(s);
161
+    if (ARCH_PPC)     ff_fft_init_ppc(s);
162
+    if (ARCH_X86)     ff_fft_init_x86(s);
163
+    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
164
+#else
165
+    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
166
+    if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
167
+#endif
168
+
169
+    for(j=4; j<=nbits; j++) {
170
+        ff_init_ff_cos_tabs(j);
171
+    }
172
+
173
+    if (s->fft_permutation == FF_FFT_PERM_AVX) {
174
+        fft_perm_avx(s);
175
+    } else {
176
+        for(i=0; i<n; i++) {
177
+            int j = i;
178
+            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
179
+                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
180
+            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
181
+        }
182
+    }
183
+
184
+    return 0;
185
+ fail:
186
+    av_freep(&s->revtab);
187
+    av_freep(&s->tmp_buf);
188
+    return -1;
189
+}
190
+
191
+static void fft_permute_c(FFTContext *s, FFTComplex *z)
192
+{
193
+    int j, np;
194
+    const uint16_t *revtab = s->revtab;
195
+    np = 1 << s->nbits;
196
+    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
197
+    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
198
+    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
199
+}
200
+
201
+av_cold void ff_fft_end(FFTContext *s)
202
+{
203
+    av_freep(&s->revtab);
204
+    av_freep(&s->tmp_buf);
205
+}
206
+
207
+#define BUTTERFLIES(a0,a1,a2,a3) {\
208
+    BF(t3, t5, t5, t1);\
209
+    BF(a2.re, a0.re, a0.re, t5);\
210
+    BF(a3.im, a1.im, a1.im, t3);\
211
+    BF(t4, t6, t2, t6);\
212
+    BF(a3.re, a1.re, a1.re, t4);\
213
+    BF(a2.im, a0.im, a0.im, t6);\
214
+}
215
+
216
+// force loading all the inputs before storing any.
217
+// this is slightly slower for small data, but avoids store->load aliasing
218
+// for addresses separated by large powers of 2.
219
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
220
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
221
+    BF(t3, t5, t5, t1);\
222
+    BF(a2.re, a0.re, r0, t5);\
223
+    BF(a3.im, a1.im, i1, t3);\
224
+    BF(t4, t6, t2, t6);\
225
+    BF(a3.re, a1.re, r1, t4);\
226
+    BF(a2.im, a0.im, i0, t6);\
227
+}
228
+
229
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
230
+    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
231
+    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
232
+    BUTTERFLIES(a0,a1,a2,a3)\
233
+}
234
+
235
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
236
+    t1 = a2.re;\
237
+    t2 = a2.im;\
238
+    t5 = a3.re;\
239
+    t6 = a3.im;\
240
+    BUTTERFLIES(a0,a1,a2,a3)\
241
+}
242
+
243
+/* z[0...8n-1], w[1...2n-1] */
244
+#define PASS(name)\
245
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
246
+{\
247
+    FFTDouble t1, t2, t3, t4, t5, t6;\
248
+    int o1 = 2*n;\
249
+    int o2 = 4*n;\
250
+    int o3 = 6*n;\
251
+    const FFTSample *wim = wre+o1;\
252
+    n--;\
253
+\
254
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
255
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
256
+    do {\
257
+        z += 2;\
258
+        wre += 2;\
259
+        wim -= 2;\
260
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
261
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
262
+    } while(--n);\
263
+}
264
+
265
+PASS(pass)
266
+#undef BUTTERFLIES
267
+#define BUTTERFLIES BUTTERFLIES_BIG
268
+PASS(pass_big)
269
+
270
+#define DECL_FFT(n,n2,n4)\
271
+static void fft##n(FFTComplex *z)\
272
+{\
273
+    fft##n2(z);\
274
+    fft##n4(z+n4*2);\
275
+    fft##n4(z+n4*3);\
276
+    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
277
+}
278
+
279
+static void fft4(FFTComplex *z)
280
+{
281
+    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
282
+
283
+    BF(t3, t1, z[0].re, z[1].re);
284
+    BF(t8, t6, z[3].re, z[2].re);
285
+    BF(z[2].re, z[0].re, t1, t6);
286
+    BF(t4, t2, z[0].im, z[1].im);
287
+    BF(t7, t5, z[2].im, z[3].im);
288
+    BF(z[3].im, z[1].im, t4, t8);
289
+    BF(z[3].re, z[1].re, t3, t7);
290
+    BF(z[2].im, z[0].im, t2, t5);
291
+}
292
+
293
+static void fft8(FFTComplex *z)
294
+{
295
+    FFTDouble t1, t2, t3, t4, t5, t6;
296
+
297
+    fft4(z);
298
+
299
+    BF(t1, z[5].re, z[4].re, -z[5].re);
300
+    BF(t2, z[5].im, z[4].im, -z[5].im);
301
+    BF(t5, z[7].re, z[6].re, -z[7].re);
302
+    BF(t6, z[7].im, z[6].im, -z[7].im);
303
+
304
+    BUTTERFLIES(z[0],z[2],z[4],z[6]);
305
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
306
+}
307
+
308
+#if !CONFIG_SMALL
309
+static void fft16(FFTComplex *z)
310
+{
311
+    FFTDouble t1, t2, t3, t4, t5, t6;
312
+    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
313
+    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
314
+
315
+    fft8(z);
316
+    fft4(z+8);
317
+    fft4(z+12);
318
+
319
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
320
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
321
+    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
322
+    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
323
+}
324
+#else
325
+DECL_FFT(16,8,4)
326
+#endif
327
+DECL_FFT(32,16,8)
328
+DECL_FFT(64,32,16)
329
+DECL_FFT(128,64,32)
330
+DECL_FFT(256,128,64)
331
+DECL_FFT(512,256,128)
332
+#if !CONFIG_SMALL
333
+#define pass pass_big
334
+#endif
335
+DECL_FFT(1024,512,256)
336
+DECL_FFT(2048,1024,512)
337
+DECL_FFT(4096,2048,1024)
338
+DECL_FFT(8192,4096,2048)
339
+DECL_FFT(16384,8192,4096)
340
+DECL_FFT(32768,16384,8192)
341
+DECL_FFT(65536,32768,16384)
342
+
343
+static void (* const fft_dispatch[])(FFTComplex*) = {
344
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
345
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
346
+};
347
+
348
+static void fft_calc_c(FFTContext *s, FFTComplex *z)
349
+{
350
+    fft_dispatch[s->nbits-2](z);
351
+}
0 352
deleted file mode 100644
... ...
@@ -1,203 +0,0 @@
1
-/*
2
- * MDCT/IMDCT transforms
3
- * Copyright (c) 2002 Fabrice Bellard
4
- *
5
- * This file is part of Libav.
6
- *
7
- * Libav is free software; you can redistribute it and/or
8
- * modify it under the terms of the GNU Lesser General Public
9
- * License as published by the Free Software Foundation; either
10
- * version 2.1 of the License, or (at your option) any later version.
11
- *
12
- * Libav is distributed in the hope that it will be useful,
13
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
- * Lesser General Public License for more details.
16
- *
17
- * You should have received a copy of the GNU Lesser General Public
18
- * License along with Libav; if not, write to the Free Software
19
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
- */
21
-
22
-#include <stdlib.h>
23
-#include <string.h>
24
-#include "libavutil/common.h"
25
-#include "libavutil/mathematics.h"
26
-#include "fft.h"
27
-#include "fft-internal.h"
28
-
29
-/**
30
- * @file
31
- * MDCT/IMDCT transforms.
32
- */
33
-
34
-#if CONFIG_FFT_FLOAT
35
-#   define RSCALE(x) (x)
36
-#else
37
-#   define RSCALE(x) ((x) >> 1)
38
-#endif
39
-
40
-/**
41
- * init MDCT or IMDCT computation.
42
- */
43
-av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
44
-{
45
-    int n, n4, i;
46
-    double alpha, theta;
47
-    int tstep;
48
-
49
-    memset(s, 0, sizeof(*s));
50
-    n = 1 << nbits;
51
-    s->mdct_bits = nbits;
52
-    s->mdct_size = n;
53
-    n4 = n >> 2;
54
-    s->mdct_permutation = FF_MDCT_PERM_NONE;
55
-
56
-    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
57
-        goto fail;
58
-
59
-    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
60
-    if (!s->tcos)
61
-        goto fail;
62
-
63
-    switch (s->mdct_permutation) {
64
-    case FF_MDCT_PERM_NONE:
65
-        s->tsin = s->tcos + n4;
66
-        tstep = 1;
67
-        break;
68
-    case FF_MDCT_PERM_INTERLEAVE:
69
-        s->tsin = s->tcos + 1;
70
-        tstep = 2;
71
-        break;
72
-    default:
73
-        goto fail;
74
-    }
75
-
76
-    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
77
-    scale = sqrt(fabs(scale));
78
-    for(i=0;i<n4;i++) {
79
-        alpha = 2 * M_PI * (i + theta) / n;
80
-        s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
81
-        s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
82
-    }
83
-    return 0;
84
- fail:
85
-    ff_mdct_end(s);
86
-    return -1;
87
-}
88
-
89
-/**
90
- * Compute the middle half of the inverse MDCT of size N = 2^nbits,
91
- * thus excluding the parts that can be derived by symmetry
92
- * @param output N/2 samples
93
- * @param input N/2 samples
94
- */
95
-void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
96
-{
97
-    int k, n8, n4, n2, n, j;
98
-    const uint16_t *revtab = s->revtab;
99
-    const FFTSample *tcos = s->tcos;
100
-    const FFTSample *tsin = s->tsin;
101
-    const FFTSample *in1, *in2;
102
-    FFTComplex *z = (FFTComplex *)output;
103
-
104
-    n = 1 << s->mdct_bits;
105
-    n2 = n >> 1;
106
-    n4 = n >> 2;
107
-    n8 = n >> 3;
108
-
109
-    /* pre rotation */
110
-    in1 = input;
111
-    in2 = input + n2 - 1;
112
-    for(k = 0; k < n4; k++) {
113
-        j=revtab[k];
114
-        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
115
-        in1 += 2;
116
-        in2 -= 2;
117
-    }
118
-    s->fft_calc(s, z);
119
-
120
-    /* post rotation + reordering */
121
-    for(k = 0; k < n8; k++) {
122
-        FFTSample r0, i0, r1, i1;
123
-        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
124
-        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
125
-        z[n8-k-1].re = r0;
126
-        z[n8-k-1].im = i0;
127
-        z[n8+k  ].re = r1;
128
-        z[n8+k  ].im = i1;
129
-    }
130
-}
131
-
132
-/**
133
- * Compute inverse MDCT of size N = 2^nbits
134
- * @param output N samples
135
- * @param input N/2 samples
136
- */
137
-void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
138
-{
139
-    int k;
140
-    int n = 1 << s->mdct_bits;
141
-    int n2 = n >> 1;
142
-    int n4 = n >> 2;
143
-
144
-    ff_imdct_half_c(s, output+n4, input);
145
-
146
-    for(k = 0; k < n4; k++) {
147
-        output[k] = -output[n2-k-1];
148
-        output[n-k-1] = output[n2+k];
149
-    }
150
-}
151
-
152
-/**
153
- * Compute MDCT of size N = 2^nbits
154
- * @param input N samples
155
- * @param out N/2 samples
156
- */
157
-void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
158
-{
159
-    int i, j, n, n8, n4, n2, n3;
160
-    FFTDouble re, im;
161
-    const uint16_t *revtab = s->revtab;
162
-    const FFTSample *tcos = s->tcos;
163
-    const FFTSample *tsin = s->tsin;
164
-    FFTComplex *x = (FFTComplex *)out;
165
-
166
-    n = 1 << s->mdct_bits;
167
-    n2 = n >> 1;
168
-    n4 = n >> 2;
169
-    n8 = n >> 3;
170
-    n3 = 3 * n4;
171
-
172
-    /* pre rotation */
173
-    for(i=0;i<n8;i++) {
174
-        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
175
-        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
176
-        j = revtab[i];
177
-        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
178
-
179
-        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
180
-        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
181
-        j = revtab[n8 + i];
182
-        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
183
-    }
184
-
185
-    s->fft_calc(s, x);
186
-
187
-    /* post rotation */
188
-    for(i=0;i<n8;i++) {
189
-        FFTSample r0, i0, r1, i1;
190
-        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
191
-        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
192
-        x[n8-i-1].re = r0;
193
-        x[n8-i-1].im = i0;
194
-        x[n8+i  ].re = r1;
195
-        x[n8+i  ].im = i1;
196
-    }
197
-}
198
-
199
-av_cold void ff_mdct_end(FFTContext *s)
200
-{
201
-    av_freep(&s->tcos);
202
-    ff_fft_end(s);
203
-}
... ...
@@ -17,7 +17,7 @@
17 17
  */
18 18
 
19 19
 #define CONFIG_FFT_FLOAT 0
20
-#include "mdct.c"
20
+#include "mdct_template.c"
21 21
 
22 22
 /* same as ff_mdct_calcw_c with double-width unscaled output */
23 23
 void ff_mdct_calcw_c(FFTContext *s, FFTDouble *out, const FFTSample *input)
... ...
@@ -17,4 +17,4 @@
17 17
  */
18 18
 
19 19
 #define CONFIG_FFT_FLOAT 1
20
-#include "mdct.c"
20
+#include "mdct_template.c"
21 21
new file mode 100644
... ...
@@ -0,0 +1,203 @@
0
+/*
1
+ * MDCT/IMDCT transforms
2
+ * Copyright (c) 2002 Fabrice Bellard
3
+ *
4
+ * This file is part of Libav.
5
+ *
6
+ * Libav is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * Libav is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with Libav; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include <stdlib.h>
22
+#include <string.h>
23
+#include "libavutil/common.h"
24
+#include "libavutil/mathematics.h"
25
+#include "fft.h"
26
+#include "fft-internal.h"
27
+
28
+/**
29
+ * @file
30
+ * MDCT/IMDCT transforms.
31
+ */
32
+
33
+#if CONFIG_FFT_FLOAT
34
+#   define RSCALE(x) (x)
35
+#else
36
+#   define RSCALE(x) ((x) >> 1)
37
+#endif
38
+
39
+/**
40
+ * init MDCT or IMDCT computation.
41
+ */
42
+av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
43
+{
44
+    int n, n4, i;
45
+    double alpha, theta;
46
+    int tstep;
47
+
48
+    memset(s, 0, sizeof(*s));
49
+    n = 1 << nbits;
50
+    s->mdct_bits = nbits;
51
+    s->mdct_size = n;
52
+    n4 = n >> 2;
53
+    s->mdct_permutation = FF_MDCT_PERM_NONE;
54
+
55
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
56
+        goto fail;
57
+
58
+    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
59
+    if (!s->tcos)
60
+        goto fail;
61
+
62
+    switch (s->mdct_permutation) {
63
+    case FF_MDCT_PERM_NONE:
64
+        s->tsin = s->tcos + n4;
65
+        tstep = 1;
66
+        break;
67
+    case FF_MDCT_PERM_INTERLEAVE:
68
+        s->tsin = s->tcos + 1;
69
+        tstep = 2;
70
+        break;
71
+    default:
72
+        goto fail;
73
+    }
74
+
75
+    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
76
+    scale = sqrt(fabs(scale));
77
+    for(i=0;i<n4;i++) {
78
+        alpha = 2 * M_PI * (i + theta) / n;
79
+        s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
80
+        s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
81
+    }
82
+    return 0;
83
+ fail:
84
+    ff_mdct_end(s);
85
+    return -1;
86
+}
87
+
88
+/**
89
+ * Compute the middle half of the inverse MDCT of size N = 2^nbits,
90
+ * thus excluding the parts that can be derived by symmetry
91
+ * @param output N/2 samples
92
+ * @param input N/2 samples
93
+ */
94
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
95
+{
96
+    int k, n8, n4, n2, n, j;
97
+    const uint16_t *revtab = s->revtab;
98
+    const FFTSample *tcos = s->tcos;
99
+    const FFTSample *tsin = s->tsin;
100
+    const FFTSample *in1, *in2;
101
+    FFTComplex *z = (FFTComplex *)output;
102
+
103
+    n = 1 << s->mdct_bits;
104
+    n2 = n >> 1;
105
+    n4 = n >> 2;
106
+    n8 = n >> 3;
107
+
108
+    /* pre rotation */
109
+    in1 = input;
110
+    in2 = input + n2 - 1;
111
+    for(k = 0; k < n4; k++) {
112
+        j=revtab[k];
113
+        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
114
+        in1 += 2;
115
+        in2 -= 2;
116
+    }
117
+    s->fft_calc(s, z);
118
+
119
+    /* post rotation + reordering */
120
+    for(k = 0; k < n8; k++) {
121
+        FFTSample r0, i0, r1, i1;
122
+        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
123
+        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
124
+        z[n8-k-1].re = r0;
125
+        z[n8-k-1].im = i0;
126
+        z[n8+k  ].re = r1;
127
+        z[n8+k  ].im = i1;
128
+    }
129
+}
130
+
131
+/**
132
+ * Compute inverse MDCT of size N = 2^nbits
133
+ * @param output N samples
134
+ * @param input N/2 samples
135
+ */
136
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
137
+{
138
+    int k;
139
+    int n = 1 << s->mdct_bits;
140
+    int n2 = n >> 1;
141
+    int n4 = n >> 2;
142
+
143
+    ff_imdct_half_c(s, output+n4, input);
144
+
145
+    for(k = 0; k < n4; k++) {
146
+        output[k] = -output[n2-k-1];
147
+        output[n-k-1] = output[n2+k];
148
+    }
149
+}
150
+
151
+/**
152
+ * Compute MDCT of size N = 2^nbits
153
+ * @param input N samples
154
+ * @param out N/2 samples
155
+ */
156
+void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
157
+{
158
+    int i, j, n, n8, n4, n2, n3;
159
+    FFTDouble re, im;
160
+    const uint16_t *revtab = s->revtab;
161
+    const FFTSample *tcos = s->tcos;
162
+    const FFTSample *tsin = s->tsin;
163
+    FFTComplex *x = (FFTComplex *)out;
164
+
165
+    n = 1 << s->mdct_bits;
166
+    n2 = n >> 1;
167
+    n4 = n >> 2;
168
+    n8 = n >> 3;
169
+    n3 = 3 * n4;
170
+
171
+    /* pre rotation */
172
+    for(i=0;i<n8;i++) {
173
+        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
174
+        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
175
+        j = revtab[i];
176
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
177
+
178
+        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
179
+        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
180
+        j = revtab[n8 + i];
181
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
182
+    }
183
+
184
+    s->fft_calc(s, x);
185
+
186
+    /* post rotation */
187
+    for(i=0;i<n8;i++) {
188
+        FFTSample r0, i0, r1, i1;
189
+        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
190
+        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
191
+        x[n8-i-1].re = r0;
192
+        x[n8-i-1].im = i0;
193
+        x[n8+i  ].re = r1;
194
+        x[n8+i  ].im = i1;
195
+    }
196
+}
197
+
198
+av_cold void ff_mdct_end(FFTContext *s)
199
+{
200
+    av_freep(&s->tcos);
201
+    ff_fft_end(s);
202
+}