FFT in MIPS implementation is working iteratively instead
of "recursively" calling functions for smaller FFT sizes.
Some of DSP and format convert utils functions are also optimized.
Signed-off-by: Nedeljko Babic <nbabic@mips.com>
Reviewed-by: Vitor Sessak <vitor1001@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -3173,6 +3173,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) |
3173 | 3173 |
if (HAVE_MMI) ff_dsputil_init_mmi (c, avctx); |
3174 | 3174 |
if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx); |
3175 | 3175 |
if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx); |
3176 |
+ if (HAVE_MIPSFPU) ff_dsputil_init_mips (c, avctx); |
|
3176 | 3177 |
|
3177 | 3178 |
for (i = 0; i < 4; i++) { |
3178 | 3179 |
for (j = 0; j < 16; j++) { |
... | ... |
@@ -622,6 +622,7 @@ void ff_dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); |
622 | 622 |
void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); |
623 | 623 |
void ff_dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); |
624 | 624 |
void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); |
625 |
+void ff_dsputil_init_mips(DSPContext* c, AVCodecContext *avctx); |
|
625 | 626 |
|
626 | 627 |
void ff_dsputil_init_dwt(DSPContext *c); |
627 | 628 |
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); |
... | ... |
@@ -162,6 +162,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
162 | 162 |
if (HAVE_ALTIVEC) ff_fft_init_altivec(s); |
163 | 163 |
if (HAVE_MMX) ff_fft_init_mmx(s); |
164 | 164 |
if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc; |
165 |
+ if (HAVE_MIPSFPU) ff_fft_init_mips(s); |
|
165 | 166 |
#else |
166 | 167 |
if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c; |
167 | 168 |
if (ARCH_ARM) ff_fft_fixed_init_arm(s); |
... | ... |
@@ -137,6 +137,7 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse); |
137 | 137 |
void ff_fft_init_altivec(FFTContext *s); |
138 | 138 |
void ff_fft_init_mmx(FFTContext *s); |
139 | 139 |
void ff_fft_init_arm(FFTContext *s); |
140 |
+void ff_fft_init_mips(FFTContext *s); |
|
140 | 141 |
#else |
141 | 142 |
void ff_fft_fixed_init_arm(FFTContext *s); |
142 | 143 |
#endif |
... | ... |
@@ -86,6 +86,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx) |
86 | 86 |
if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx); |
87 | 87 |
if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx); |
88 | 88 |
if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx); |
89 |
+ if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c); |
|
89 | 90 |
} |
90 | 91 |
|
91 | 92 |
/* ffdshow custom code */ |
... | ... |
@@ -92,6 +92,7 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx); |
92 | 92 |
void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx); |
93 | 93 |
void ff_fmt_convert_init_altivec(FmtConvertContext *c, AVCodecContext *avctx); |
94 | 94 |
void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx); |
95 |
+void ff_fmt_convert_init_mips(FmtConvertContext *c); |
|
95 | 96 |
|
96 | 97 |
/* ffdshow custom code */ |
97 | 98 |
void float_interleave(float *dst, const float **src, long len, int channels); |
... | ... |
@@ -13,3 +13,7 @@ MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER) += mips/acelp_filters_mips.o \ |
13 | 13 |
mips/acelp_vectors_mips.o |
14 | 14 |
MIPSFPU-OBJS-$(CONFIG_MPEGAUDIODSP) += mips/mpegaudiodsp_mips_float.o |
15 | 15 |
MIPSDSPR1-OBJS-$(CONFIG_MPEGAUDIODSP) += mips/mpegaudiodsp_mips_fixed.o |
16 |
+OBJS-$(CONFIG_FFT) += mips/fft_init_table.o |
|
17 |
+MIPSFPU-OBJS-$(CONFIG_FFT) += mips/fft_mips.o |
|
18 |
+MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/fmtconvert_mips.o |
|
19 |
+MIPSFPU-OBJS-$(HAVE_INLINE_ASM) += mips/dsputil_mips.o |
16 | 20 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,164 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 |
|
2 |
+ * MIPS Technologies, Inc., California. |
|
3 |
+ * |
|
4 |
+ * Redistribution and use in source and binary forms, with or without |
|
5 |
+ * modification, are permitted provided that the following conditions |
|
6 |
+ * are met: |
|
7 |
+ * 1. Redistributions of source code must retain the above copyright |
|
8 |
+ * notice, this list of conditions and the following disclaimer. |
|
9 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
10 |
+ * notice, this list of conditions and the following disclaimer in the |
|
11 |
+ * documentation and/or other materials provided with the distribution. |
|
12 |
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of is |
|
13 |
+ * contributors may be used to endorse or promote products derived from |
|
14 |
+ * this software without specific prior written permission. |
|
15 |
+ * |
|
16 |
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
17 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
20 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
21 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
22 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
23 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
24 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
25 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
26 |
+ * SUCH DAMAGE. |
|
27 |
+ * |
|
28 |
+ * Author: Zoran Lukic (zoranl@mips.com) |
|
29 |
+ * |
|
30 |
+ * This file is part of FFmpeg. |
|
31 |
+ * |
|
32 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
33 |
+ * modify it under the terms of the GNU Lesser General Public |
|
34 |
+ * License as published by the Free Software Foundation; either |
|
35 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
36 |
+ * |
|
37 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
38 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
39 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
40 |
+ * Lesser General Public License for more details. |
|
41 |
+ * |
|
42 |
+ * You should have received a copy of the GNU Lesser General Public |
|
43 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
44 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
45 |
+ */ |
|
46 |
+#include "config.h" |
|
47 |
+#include "libavcodec/dsputil.h" |
|
48 |
+ |
|
49 |
+static void vector_fmul_window_mips(float *dst, const float *src0, |
|
50 |
+ const float *src1, const float *win, int len) |
|
51 |
+{ |
|
52 |
+ int i, j; |
|
53 |
+ /* |
|
54 |
+ * variables used in inline assembler |
|
55 |
+ */ |
|
56 |
+ float * dst_i, * dst_j, * dst_i2, * dst_j2; |
|
57 |
+ float temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
|
58 |
+ |
|
59 |
+ dst += len; |
|
60 |
+ win += len; |
|
61 |
+ src0 += len; |
|
62 |
+ |
|
63 |
+ for (i = -len, j = len - 1; i < 0; i += 8, j -= 8) { |
|
64 |
+ |
|
65 |
+ dst_i = dst + i; |
|
66 |
+ dst_j = dst + j; |
|
67 |
+ |
|
68 |
+ dst_i2 = dst + i + 4; |
|
69 |
+ dst_j2 = dst + j - 4; |
|
70 |
+ |
|
71 |
+ __asm__ volatile ( |
|
72 |
+ "mul.s %[temp], %[s1], %[wi] \n\t" |
|
73 |
+ "mul.s %[temp1], %[s1], %[wj] \n\t" |
|
74 |
+ "mul.s %[temp2], %[s11], %[wi1] \n\t" |
|
75 |
+ "mul.s %[temp3], %[s11], %[wj1] \n\t" |
|
76 |
+ |
|
77 |
+ "msub.s %[temp], %[temp], %[s0], %[wj] \n\t" |
|
78 |
+ "madd.s %[temp1], %[temp1], %[s0], %[wi] \n\t" |
|
79 |
+ "msub.s %[temp2], %[temp2], %[s01], %[wj1] \n\t" |
|
80 |
+ "madd.s %[temp3], %[temp3], %[s01], %[wi1] \n\t" |
|
81 |
+ |
|
82 |
+ "swc1 %[temp], 0(%[dst_i]) \n\t" /* dst[i] = s0*wj - s1*wi; */ |
|
83 |
+ "swc1 %[temp1], 0(%[dst_j]) \n\t" /* dst[j] = s0*wi + s1*wj; */ |
|
84 |
+ "swc1 %[temp2], 4(%[dst_i]) \n\t" /* dst[i+1] = s01*wj1 - s11*wi1; */ |
|
85 |
+ "swc1 %[temp3], -4(%[dst_j]) \n\t" /* dst[j-1] = s01*wi1 + s11*wj1; */ |
|
86 |
+ |
|
87 |
+ "mul.s %[temp4], %[s12], %[wi2] \n\t" |
|
88 |
+ "mul.s %[temp5], %[s12], %[wj2] \n\t" |
|
89 |
+ "mul.s %[temp6], %[s13], %[wi3] \n\t" |
|
90 |
+ "mul.s %[temp7], %[s13], %[wj3] \n\t" |
|
91 |
+ |
|
92 |
+ "msub.s %[temp4], %[temp4], %[s02], %[wj2] \n\t" |
|
93 |
+ "madd.s %[temp5], %[temp5], %[s02], %[wi2] \n\t" |
|
94 |
+ "msub.s %[temp6], %[temp6], %[s03], %[wj3] \n\t" |
|
95 |
+ "madd.s %[temp7], %[temp7], %[s03], %[wi3] \n\t" |
|
96 |
+ |
|
97 |
+ "swc1 %[temp4], 8(%[dst_i]) \n\t" /* dst[i+2] = s02*wj2 - s12*wi2; */ |
|
98 |
+ "swc1 %[temp5], -8(%[dst_j]) \n\t" /* dst[j-2] = s02*wi2 + s12*wj2; */ |
|
99 |
+ "swc1 %[temp6], 12(%[dst_i]) \n\t" /* dst[i+2] = s03*wj3 - s13*wi3; */ |
|
100 |
+ "swc1 %[temp7], -12(%[dst_j]) \n\t" /* dst[j-3] = s03*wi3 + s13*wj3; */ |
|
101 |
+ : [temp]"=&f"(temp), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), |
|
102 |
+ [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), |
|
103 |
+ [temp6]"=&f"(temp6), [temp7]"=&f"(temp7) |
|
104 |
+ : [dst_j]"r"(dst_j), [dst_i]"r" (dst_i), |
|
105 |
+ [s0] "f"(src0[i]), [wj] "f"(win[j]), [s1] "f"(src1[j]), |
|
106 |
+ [wi] "f"(win[i]), [s01]"f"(src0[i + 1]),[wj1]"f"(win[j - 1]), |
|
107 |
+ [s11]"f"(src1[j - 1]), [wi1]"f"(win[i + 1]), [s02]"f"(src0[i + 2]), |
|
108 |
+ [wj2]"f"(win[j - 2]), [s12]"f"(src1[j - 2]),[wi2]"f"(win[i + 2]), |
|
109 |
+ [s03]"f"(src0[i + 3]), [wj3]"f"(win[j - 3]), [s13]"f"(src1[j - 3]), |
|
110 |
+ [wi3]"f"(win[i + 3]) |
|
111 |
+ : "memory" |
|
112 |
+ ); |
|
113 |
+ |
|
114 |
+ __asm__ volatile ( |
|
115 |
+ "mul.s %[temp], %[s1], %[wi] \n\t" |
|
116 |
+ "mul.s %[temp1], %[s1], %[wj] \n\t" |
|
117 |
+ "mul.s %[temp2], %[s11], %[wi1] \n\t" |
|
118 |
+ "mul.s %[temp3], %[s11], %[wj1] \n\t" |
|
119 |
+ |
|
120 |
+ "msub.s %[temp], %[temp], %[s0], %[wj] \n\t" |
|
121 |
+ "madd.s %[temp1], %[temp1], %[s0], %[wi] \n\t" |
|
122 |
+ "msub.s %[temp2], %[temp2], %[s01], %[wj1] \n\t" |
|
123 |
+ "madd.s %[temp3], %[temp3], %[s01], %[wi1] \n\t" |
|
124 |
+ |
|
125 |
+ "swc1 %[temp], 0(%[dst_i2]) \n\t" /* dst[i] = s0*wj - s1*wi; */ |
|
126 |
+ "swc1 %[temp1], 0(%[dst_j2]) \n\t" /* dst[j] = s0*wi + s1*wj; */ |
|
127 |
+ "swc1 %[temp2], 4(%[dst_i2]) \n\t" /* dst[i+1] = s01*wj1 - s11*wi1; */ |
|
128 |
+ "swc1 %[temp3], -4(%[dst_j2]) \n\t" /* dst[j-1] = s01*wi1 + s11*wj1; */ |
|
129 |
+ |
|
130 |
+ "mul.s %[temp4], %[s12], %[wi2] \n\t" |
|
131 |
+ "mul.s %[temp5], %[s12], %[wj2] \n\t" |
|
132 |
+ "mul.s %[temp6], %[s13], %[wi3] \n\t" |
|
133 |
+ "mul.s %[temp7], %[s13], %[wj3] \n\t" |
|
134 |
+ |
|
135 |
+ "msub.s %[temp4], %[temp4], %[s02], %[wj2] \n\t" |
|
136 |
+ "madd.s %[temp5], %[temp5], %[s02], %[wi2] \n\t" |
|
137 |
+ "msub.s %[temp6], %[temp6], %[s03], %[wj3] \n\t" |
|
138 |
+ "madd.s %[temp7], %[temp7], %[s03], %[wi3] \n\t" |
|
139 |
+ |
|
140 |
+ "swc1 %[temp4], 8(%[dst_i2]) \n\t" /* dst[i+2] = s02*wj2 - s12*wi2; */ |
|
141 |
+ "swc1 %[temp5], -8(%[dst_j2]) \n\t" /* dst[j-2] = s02*wi2 + s12*wj2; */ |
|
142 |
+ "swc1 %[temp6], 12(%[dst_i2]) \n\t" /* dst[i+2] = s03*wj3 - s13*wi3; */ |
|
143 |
+ "swc1 %[temp7], -12(%[dst_j2]) \n\t" /* dst[j-3] = s03*wi3 + s13*wj3; */ |
|
144 |
+ : [temp]"=&f"(temp), |
|
145 |
+ [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), [temp3]"=&f"(temp3), |
|
146 |
+ [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), [temp6]"=&f"(temp6), |
|
147 |
+ [temp7] "=&f" (temp7) |
|
148 |
+ : [dst_j2]"r"(dst_j2), [dst_i2]"r"(dst_i2), |
|
149 |
+ [s0] "f"(src0[i + 4]), [wj] "f"(win[j - 4]), [s1] "f"(src1[j - 4]), |
|
150 |
+ [wi] "f"(win[i + 4]), [s01]"f"(src0[i + 5]),[wj1]"f"(win[j - 5]), |
|
151 |
+ [s11]"f"(src1[j - 5]), [wi1]"f"(win[i + 5]), [s02]"f"(src0[i + 6]), |
|
152 |
+ [wj2]"f"(win[j - 6]), [s12]"f"(src1[j - 6]),[wi2]"f"(win[i + 6]), |
|
153 |
+ [s03]"f"(src0[i + 7]), [wj3]"f"(win[j - 7]), [s13]"f"(src1[j - 7]), |
|
154 |
+ [wi3]"f"(win[i + 7]) |
|
155 |
+ : "memory" |
|
156 |
+ ); |
|
157 |
+ } |
|
158 |
+} |
|
159 |
+ |
|
160 |
+av_cold void ff_dsputil_init_mips( DSPContext* c, AVCodecContext *avctx ) |
|
161 |
+{ |
|
162 |
+ c->vector_fmul_window = vector_fmul_window_mips; |
|
163 |
+} |
0 | 164 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,67 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 |
|
2 |
+ * MIPS Technologies, Inc., California. |
|
3 |
+ * |
|
4 |
+ * Redistribution and use in source and binary forms, with or without |
|
5 |
+ * modification, are permitted provided that the following conditions |
|
6 |
+ * are met: |
|
7 |
+ * 1. Redistributions of source code must retain the above copyright |
|
8 |
+ * notice, this list of conditions and the following disclaimer. |
|
9 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
10 |
+ * notice, this list of conditions and the following disclaimer in the |
|
11 |
+ * documentation and/or other materials provided with the distribution. |
|
12 |
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
|
13 |
+ * contributors may be used to endorse or promote products derived from |
|
14 |
+ * this software without specific prior written permission. |
|
15 |
+ * |
|
16 |
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
17 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
20 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
21 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
22 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
23 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
24 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
25 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
26 |
+ * SUCH DAMAGE. |
|
27 |
+ * |
|
28 |
+ * Author: Stanislav Ocovaj (socovaj@mips.com) |
|
29 |
+ * |
|
30 |
+ * This file is part of FFmpeg. |
|
31 |
+ * |
|
32 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
33 |
+ * modify it under the terms of the GNU Lesser General Public |
|
34 |
+ * License as published by the Free Software Foundation; either |
|
35 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
36 |
+ * |
|
37 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
38 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
39 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
40 |
+ * Lesser General Public License for more details. |
|
41 |
+ * |
|
42 |
+ * You should have received a copy of the GNU Lesser General Public |
|
43 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
44 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
45 |
+ */ |
|
46 |
+ |
|
47 |
+/** |
|
48 |
+ * @file |
|
49 |
+ * definitions and initialization of LUT table for MIPS FFT |
|
50 |
+ */ |
|
51 |
+#include "fft_table.h" |
|
52 |
+ |
|
53 |
+uint16_t fft_offsets_lut[0x2aab]; |
|
54 |
+ |
|
55 |
+void ff_fft_lut_init(uint16_t *table, int off, int size, int *index) |
|
56 |
+{ |
|
57 |
+ if (size < 16) { |
|
58 |
+ table[*index] = off >> 2; |
|
59 |
+ (*index)++; |
|
60 |
+ } |
|
61 |
+ else { |
|
62 |
+ ff_fft_lut_init(table, off, size>>1, index); |
|
63 |
+ ff_fft_lut_init(table, off+(size>>1), size>>2, index); |
|
64 |
+ ff_fft_lut_init(table, off+3*(size>>2), size>>2, index); |
|
65 |
+ } |
|
66 |
+} |
0 | 67 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,530 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 |
|
2 |
+ * MIPS Technologies, Inc., California. |
|
3 |
+ * |
|
4 |
+ * Redistribution and use in source and binary forms, with or without |
|
5 |
+ * modification, are permitted provided that the following conditions |
|
6 |
+ * are met: |
|
7 |
+ * 1. Redistributions of source code must retain the above copyright |
|
8 |
+ * notice, this list of conditions and the following disclaimer. |
|
9 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
10 |
+ * notice, this list of conditions and the following disclaimer in the |
|
11 |
+ * documentation and/or other materials provided with the distribution. |
|
12 |
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
|
13 |
+ * contributors may be used to endorse or promote products derived from |
|
14 |
+ * this software without specific prior written permission. |
|
15 |
+ * |
|
16 |
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
17 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
20 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
21 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
22 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
23 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
24 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
25 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
26 |
+ * SUCH DAMAGE. |
|
27 |
+ * |
|
28 |
+ * Author: Stanislav Ocovaj (socovaj@mips.com) |
|
29 |
+ * Author: Zoran Lukic (zoranl@mips.com) |
|
30 |
+ * |
|
31 |
+ * Optimized MDCT/IMDCT and FFT transforms |
|
32 |
+ * |
|
33 |
+ * This file is part of FFmpeg. |
|
34 |
+ * |
|
35 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
36 |
+ * modify it under the terms of the GNU Lesser General Public |
|
37 |
+ * License as published by the Free Software Foundation; either |
|
38 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
39 |
+ * |
|
40 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
41 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
42 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
43 |
+ * Lesser General Public License for more details. |
|
44 |
+ * |
|
45 |
+ * You should have received a copy of the GNU Lesser General Public |
|
46 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
47 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
48 |
+ */ |
|
49 |
+#include "config.h" |
|
50 |
+#include "libavcodec/fft.h" |
|
51 |
+#include "fft_table.h" |
|
52 |
+ |
|
53 |
+/** |
|
54 |
+ * FFT transform |
|
55 |
+ */ |
|
56 |
+ |
|
57 |
+#if HAVE_INLINE_ASM |
|
58 |
+static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z) |
|
59 |
+{ |
|
60 |
+ int nbits, i, n, num_transforms, offset, step; |
|
61 |
+ int n4, n2, n34; |
|
62 |
+ FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
|
63 |
+ FFTComplex *tmpz; |
|
64 |
+ float w_re, w_im; |
|
65 |
+ float *w_re_ptr, *w_im_ptr; |
|
66 |
+ const int fft_size = (1 << s->nbits); |
|
67 |
+ int s_n = s->nbits; |
|
68 |
+ int tem1, tem2; |
|
69 |
+ float pom, pom1, pom2, pom3; |
|
70 |
+ float temp, temp1, temp3, temp4; |
|
71 |
+ FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4; |
|
72 |
+ FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i; |
|
73 |
+ |
|
74 |
+ /** |
|
75 |
+ *num_transforms = (0x2aab >> (16 - s->nbits)) | 1; |
|
76 |
+ */ |
|
77 |
+ __asm__ volatile ( |
|
78 |
+ "li %[tem1], 16 \n\t" |
|
79 |
+ "sub %[s_n], %[tem1], %[s_n] \n\t" |
|
80 |
+ "li %[tem2], 10923 \n\t" |
|
81 |
+ "srav %[tem2], %[tem2], %[s_n] \n\t" |
|
82 |
+ "ori %[num_t],%[tem2], 1 \n\t" |
|
83 |
+ : [num_t]"=r"(num_transforms), [s_n]"+r"(s_n), |
|
84 |
+ [tem1]"=&r"(tem1), [tem2]"=&r"(tem2) |
|
85 |
+ ); |
|
86 |
+ |
|
87 |
+ |
|
88 |
+ for (n=0; n<num_transforms; n++) { |
|
89 |
+ offset = fft_offsets_lut[n] << 2; |
|
90 |
+ tmpz = z + offset; |
|
91 |
+ |
|
92 |
+ tmp1 = tmpz[0].re + tmpz[1].re; |
|
93 |
+ tmp5 = tmpz[2].re + tmpz[3].re; |
|
94 |
+ tmp2 = tmpz[0].im + tmpz[1].im; |
|
95 |
+ tmp6 = tmpz[2].im + tmpz[3].im; |
|
96 |
+ tmp3 = tmpz[0].re - tmpz[1].re; |
|
97 |
+ tmp8 = tmpz[2].im - tmpz[3].im; |
|
98 |
+ tmp4 = tmpz[0].im - tmpz[1].im; |
|
99 |
+ tmp7 = tmpz[2].re - tmpz[3].re; |
|
100 |
+ |
|
101 |
+ tmpz[0].re = tmp1 + tmp5; |
|
102 |
+ tmpz[2].re = tmp1 - tmp5; |
|
103 |
+ tmpz[0].im = tmp2 + tmp6; |
|
104 |
+ tmpz[2].im = tmp2 - tmp6; |
|
105 |
+ tmpz[1].re = tmp3 + tmp8; |
|
106 |
+ tmpz[3].re = tmp3 - tmp8; |
|
107 |
+ tmpz[1].im = tmp4 - tmp7; |
|
108 |
+ tmpz[3].im = tmp4 + tmp7; |
|
109 |
+ |
|
110 |
+ } |
|
111 |
+ |
|
112 |
+ if (fft_size < 8) |
|
113 |
+ return; |
|
114 |
+ |
|
115 |
+ num_transforms = (num_transforms >> 1) | 1; |
|
116 |
+ |
|
117 |
+ for (n=0; n<num_transforms; n++) { |
|
118 |
+ offset = fft_offsets_lut[n] << 3; |
|
119 |
+ tmpz = z + offset; |
|
120 |
+ |
|
121 |
+ __asm__ volatile ( |
|
122 |
+ "lwc1 %[tmp1], 32(%[tmpz]) \n\t" |
|
123 |
+ "lwc1 %[pom], 40(%[tmpz]) \n\t" |
|
124 |
+ "lwc1 %[tmp3], 48(%[tmpz]) \n\t" |
|
125 |
+ "lwc1 %[pom1], 56(%[tmpz]) \n\t" |
|
126 |
+ "lwc1 %[tmp2], 36(%[tmpz]) \n\t" |
|
127 |
+ "lwc1 %[pom2], 44(%[tmpz]) \n\t" |
|
128 |
+ "lwc1 %[pom3], 60(%[tmpz]) \n\t" |
|
129 |
+ "lwc1 %[tmp4], 52(%[tmpz]) \n\t" |
|
130 |
+ "add.s %[tmp1], %[tmp1], %[pom] \n\t" // tmp1 = tmpz[4].re + tmpz[5].re; |
|
131 |
+ "add.s %[tmp3], %[tmp3], %[pom1] \n\t" // tmp3 = tmpz[6].re + tmpz[7].re; |
|
132 |
+ "add.s %[tmp2], %[tmp2], %[pom2] \n\t" // tmp2 = tmpz[4].im + tmpz[5].im; |
|
133 |
+ "lwc1 %[pom], 40(%[tmpz]) \n\t" |
|
134 |
+ "add.s %[tmp4], %[tmp4], %[pom3] \n\t" // tmp4 = tmpz[6].im + tmpz[7].im; |
|
135 |
+ "add.s %[tmp5], %[tmp1], %[tmp3] \n\t" // tmp5 = tmp1 + tmp3; |
|
136 |
+ "sub.s %[tmp7], %[tmp1], %[tmp3] \n\t" // tmp7 = tmp1 - tmp3; |
|
137 |
+ "lwc1 %[tmp1], 32(%[tmpz]) \n\t" |
|
138 |
+ "lwc1 %[pom1], 44(%[tmpz]) \n\t" |
|
139 |
+ "add.s %[tmp6], %[tmp2], %[tmp4] \n\t" // tmp6 = tmp2 + tmp4; |
|
140 |
+ "sub.s %[tmp8], %[tmp2], %[tmp4] \n\t" // tmp8 = tmp2 - tmp4; |
|
141 |
+ "lwc1 %[tmp2], 36(%[tmpz]) \n\t" |
|
142 |
+ "lwc1 %[pom2], 56(%[tmpz]) \n\t" |
|
143 |
+ "lwc1 %[pom3], 60(%[tmpz]) \n\t" |
|
144 |
+ "lwc1 %[tmp3], 48(%[tmpz]) \n\t" |
|
145 |
+ "lwc1 %[tmp4], 52(%[tmpz]) \n\t" |
|
146 |
+ "sub.s %[tmp1], %[tmp1], %[pom] \n\t" // tmp1 = tmpz[4].re - tmpz[5].re; |
|
147 |
+ "lwc1 %[pom], 0(%[tmpz]) \n\t" |
|
148 |
+ "sub.s %[tmp2], %[tmp2], %[pom1] \n\t" // tmp2 = tmpz[4].im - tmpz[5].im; |
|
149 |
+ "sub.s %[tmp3], %[tmp3], %[pom2] \n\t" // tmp3 = tmpz[6].re - tmpz[7].re; |
|
150 |
+ "lwc1 %[pom2], 4(%[tmpz]) \n\t" |
|
151 |
+ "sub.s %[pom1], %[pom], %[tmp5] \n\t" |
|
152 |
+ "sub.s %[tmp4], %[tmp4], %[pom3] \n\t" // tmp4 = tmpz[6].im - tmpz[7].im; |
|
153 |
+ "add.s %[pom3], %[pom], %[tmp5] \n\t" |
|
154 |
+ "sub.s %[pom], %[pom2], %[tmp6] \n\t" |
|
155 |
+ "add.s %[pom2], %[pom2], %[tmp6] \n\t" |
|
156 |
+ "swc1 %[pom1], 32(%[tmpz]) \n\t" // tmpz[4].re = tmpz[0].re - tmp5; |
|
157 |
+ "swc1 %[pom3], 0(%[tmpz]) \n\t" // tmpz[0].re = tmpz[0].re + tmp5; |
|
158 |
+ "swc1 %[pom], 36(%[tmpz]) \n\t" // tmpz[4].im = tmpz[0].im - tmp6; |
|
159 |
+ "swc1 %[pom2], 4(%[tmpz]) \n\t" // tmpz[0].im = tmpz[0].im + tmp6; |
|
160 |
+ "lwc1 %[pom1], 16(%[tmpz]) \n\t" |
|
161 |
+ "lwc1 %[pom3], 20(%[tmpz]) \n\t" |
|
162 |
+ "li.s %[pom], 0.7071067812 \n\t" // float pom = 0.7071067812f; |
|
163 |
+ "add.s %[temp1],%[tmp1], %[tmp2] \n\t" |
|
164 |
+ "sub.s %[temp], %[pom1], %[tmp8] \n\t" |
|
165 |
+ "add.s %[pom2], %[pom3], %[tmp7] \n\t" |
|
166 |
+ "sub.s %[temp3],%[tmp3], %[tmp4] \n\t" |
|
167 |
+ "sub.s %[temp4],%[tmp2], %[tmp1] \n\t" |
|
168 |
+ "swc1 %[temp], 48(%[tmpz]) \n\t" // tmpz[6].re = tmpz[2].re - tmp8; |
|
169 |
+ "swc1 %[pom2], 52(%[tmpz]) \n\t" // tmpz[6].im = tmpz[2].im + tmp7; |
|
170 |
+ "add.s %[pom1], %[pom1], %[tmp8] \n\t" |
|
171 |
+ "sub.s %[pom3], %[pom3], %[tmp7] \n\t" |
|
172 |
+ "add.s %[tmp3], %[tmp3], %[tmp4] \n\t" |
|
173 |
+ "mul.s %[tmp5], %[pom], %[temp1] \n\t" // tmp5 = pom * (tmp1 + tmp2); |
|
174 |
+ "mul.s %[tmp7], %[pom], %[temp3] \n\t" // tmp7 = pom * (tmp3 - tmp4); |
|
175 |
+ "mul.s %[tmp6], %[pom], %[temp4] \n\t" // tmp6 = pom * (tmp2 - tmp1); |
|
176 |
+ "mul.s %[tmp8], %[pom], %[tmp3] \n\t" // tmp8 = pom * (tmp3 + tmp4); |
|
177 |
+ "swc1 %[pom1], 16(%[tmpz]) \n\t" // tmpz[2].re = tmpz[2].re + tmp8; |
|
178 |
+ "swc1 %[pom3], 20(%[tmpz]) \n\t" // tmpz[2].im = tmpz[2].im - tmp7; |
|
179 |
+ "add.s %[tmp1], %[tmp5], %[tmp7] \n\t" // tmp1 = tmp5 + tmp7; |
|
180 |
+ "sub.s %[tmp3], %[tmp5], %[tmp7] \n\t" // tmp3 = tmp5 - tmp7; |
|
181 |
+ "add.s %[tmp2], %[tmp6], %[tmp8] \n\t" // tmp2 = tmp6 + tmp8; |
|
182 |
+ "sub.s %[tmp4], %[tmp6], %[tmp8] \n\t" // tmp4 = tmp6 - tmp8; |
|
183 |
+ "lwc1 %[temp], 8(%[tmpz]) \n\t" |
|
184 |
+ "lwc1 %[temp1],12(%[tmpz]) \n\t" |
|
185 |
+ "lwc1 %[pom], 24(%[tmpz]) \n\t" |
|
186 |
+ "lwc1 %[pom2], 28(%[tmpz]) \n\t" |
|
187 |
+ "sub.s %[temp4],%[temp], %[tmp1] \n\t" |
|
188 |
+ "sub.s %[temp3],%[temp1], %[tmp2] \n\t" |
|
189 |
+ "add.s %[temp], %[temp], %[tmp1] \n\t" |
|
190 |
+ "add.s %[temp1],%[temp1], %[tmp2] \n\t" |
|
191 |
+ "sub.s %[pom1], %[pom], %[tmp4] \n\t" |
|
192 |
+ "add.s %[pom3], %[pom2], %[tmp3] \n\t" |
|
193 |
+ "add.s %[pom], %[pom], %[tmp4] \n\t" |
|
194 |
+ "sub.s %[pom2], %[pom2], %[tmp3] \n\t" |
|
195 |
+ "swc1 %[temp4],40(%[tmpz]) \n\t" // tmpz[5].re = tmpz[1].re - tmp1; |
|
196 |
+ "swc1 %[temp3],44(%[tmpz]) \n\t" // tmpz[5].im = tmpz[1].im - tmp2; |
|
197 |
+ "swc1 %[temp], 8(%[tmpz]) \n\t" // tmpz[1].re = tmpz[1].re + tmp1; |
|
198 |
+ "swc1 %[temp1],12(%[tmpz]) \n\t" // tmpz[1].im = tmpz[1].im + tmp2; |
|
199 |
+ "swc1 %[pom1], 56(%[tmpz]) \n\t" // tmpz[7].re = tmpz[3].re - tmp4; |
|
200 |
+ "swc1 %[pom3], 60(%[tmpz]) \n\t" // tmpz[7].im = tmpz[3].im + tmp3; |
|
201 |
+ "swc1 %[pom], 24(%[tmpz]) \n\t" // tmpz[3].re = tmpz[3].re + tmp4; |
|
202 |
+ "swc1 %[pom2], 28(%[tmpz]) \n\t" // tmpz[3].im = tmpz[3].im - tmp3; |
|
203 |
+ : [tmp1]"=&f"(tmp1), [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2), |
|
204 |
+ [tmp3]"=&f"(tmp3), [tmp2]"=&f"(tmp2), [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp7]"=&f"(tmp7), |
|
205 |
+ [tmp6]"=&f"(tmp6), [tmp8]"=&f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1), |
|
206 |
+ [temp3]"=&f"(temp3), [temp4]"=&f"(temp4) |
|
207 |
+ : [tmpz]"r"(tmpz) |
|
208 |
+ : "memory" |
|
209 |
+ ); |
|
210 |
+ } |
|
211 |
+ |
|
212 |
+ step = 1 << (MAX_LOG2_NFFT - 4); |
|
213 |
+ n4 = 4; |
|
214 |
+ |
|
215 |
+ for (nbits=4; nbits<=s->nbits; nbits++) { |
|
216 |
+ /* |
|
217 |
+ * num_transforms = (num_transforms >> 1) | 1; |
|
218 |
+ */ |
|
219 |
+ __asm__ volatile ( |
|
220 |
+ "sra %[num_t], %[num_t], 1 \n\t" |
|
221 |
+ "ori %[num_t], %[num_t], 1 \n\t" |
|
222 |
+ |
|
223 |
+ : [num_t] "+r" (num_transforms) |
|
224 |
+ ); |
|
225 |
+ n2 = 2 * n4; |
|
226 |
+ n34 = 3 * n4; |
|
227 |
+ |
|
228 |
+ for (n=0; n<num_transforms; n++) { |
|
229 |
+ offset = fft_offsets_lut[n] << nbits; |
|
230 |
+ tmpz = z + offset; |
|
231 |
+ |
|
232 |
+ tmpz_n2 = tmpz + n2; |
|
233 |
+ tmpz_n4 = tmpz + n4; |
|
234 |
+ tmpz_n34 = tmpz + n34; |
|
235 |
+ |
|
236 |
+ __asm__ volatile ( |
|
237 |
+ "lwc1 %[pom1], 0(%[tmpz_n2]) \n\t" |
|
238 |
+ "lwc1 %[pom], 0(%[tmpz_n34]) \n\t" |
|
239 |
+ "lwc1 %[pom2], 4(%[tmpz_n2]) \n\t" |
|
240 |
+ "lwc1 %[pom3], 4(%[tmpz_n34]) \n\t" |
|
241 |
+ "lwc1 %[temp1],0(%[tmpz]) \n\t" |
|
242 |
+ "lwc1 %[temp3],4(%[tmpz]) \n\t" |
|
243 |
+ "add.s %[tmp5], %[pom1], %[pom] \n\t" // tmp5 = tmpz[ n2].re + tmpz[n34].re; |
|
244 |
+ "sub.s %[tmp1], %[pom1], %[pom] \n\t" // tmp1 = tmpz[ n2].re - tmpz[n34].re; |
|
245 |
+ "add.s %[tmp6], %[pom2], %[pom3] \n\t" // tmp6 = tmpz[ n2].im + tmpz[n34].im; |
|
246 |
+ "sub.s %[tmp2], %[pom2], %[pom3] \n\t" // tmp2 = tmpz[ n2].im - tmpz[n34].im; |
|
247 |
+ "sub.s %[temp], %[temp1], %[tmp5] \n\t" |
|
248 |
+ "add.s %[temp1],%[temp1], %[tmp5] \n\t" |
|
249 |
+ "sub.s %[temp4],%[temp3], %[tmp6] \n\t" |
|
250 |
+ "add.s %[temp3],%[temp3], %[tmp6] \n\t" |
|
251 |
+ "swc1 %[temp], 0(%[tmpz_n2]) \n\t" // tmpz[ n2].re = tmpz[ 0].re - tmp5; |
|
252 |
+ "swc1 %[temp1],0(%[tmpz]) \n\t" // tmpz[ 0].re = tmpz[ 0].re + tmp5; |
|
253 |
+ "lwc1 %[pom1], 0(%[tmpz_n4]) \n\t" |
|
254 |
+ "swc1 %[temp4],4(%[tmpz_n2]) \n\t" // tmpz[ n2].im = tmpz[ 0].im - tmp6; |
|
255 |
+ "lwc1 %[temp], 4(%[tmpz_n4]) \n\t" |
|
256 |
+ "swc1 %[temp3],4(%[tmpz]) \n\t" // tmpz[ 0].im = tmpz[ 0].im + tmp6; |
|
257 |
+ "sub.s %[pom], %[pom1], %[tmp2] \n\t" |
|
258 |
+ "add.s %[pom1], %[pom1], %[tmp2] \n\t" |
|
259 |
+ "add.s %[temp1],%[temp], %[tmp1] \n\t" |
|
260 |
+ "sub.s %[temp], %[temp], %[tmp1] \n\t" |
|
261 |
+ "swc1 %[pom], 0(%[tmpz_n34]) \n\t" // tmpz[n34].re = tmpz[n4].re - tmp2; |
|
262 |
+ "swc1 %[pom1], 0(%[tmpz_n4]) \n\t" // tmpz[ n4].re = tmpz[n4].re + tmp2; |
|
263 |
+ "swc1 %[temp1],4(%[tmpz_n34]) \n\t" // tmpz[n34].im = tmpz[n4].im + tmp1; |
|
264 |
+ "swc1 %[temp], 4(%[tmpz_n4]) \n\t" // tmpz[ n4].im = tmpz[n4].im - tmp1; |
|
265 |
+ : [tmp5]"=&f"(tmp5), |
|
266 |
+ [tmp1]"=&f"(tmp1), [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2), |
|
267 |
+ [tmp2]"=&f"(tmp2), [tmp6]"=&f"(tmp6), [pom3]"=&f"(pom3), |
|
268 |
+ [temp]"=&f"(temp), [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4) |
|
269 |
+ : [tmpz]"r"(tmpz), [tmpz_n2]"r"(tmpz_n2), [tmpz_n34]"r"(tmpz_n34), [tmpz_n4]"r"(tmpz_n4) |
|
270 |
+ : "memory" |
|
271 |
+ ); |
|
272 |
+ |
|
273 |
+ w_re_ptr = (float*)(ff_cos_65536 + step); |
|
274 |
+ w_im_ptr = (float*)(ff_cos_65536 + MAX_FFT_SIZE/4 - step); |
|
275 |
+ |
|
276 |
+ for (i=1; i<n4; i++) { |
|
277 |
+ w_re = w_re_ptr[0]; |
|
278 |
+ w_im = w_im_ptr[0]; |
|
279 |
+ tmpz_n2_i = tmpz_n2 + i; |
|
280 |
+ tmpz_n4_i = tmpz_n4 + i; |
|
281 |
+ tmpz_n34_i= tmpz_n34 + i; |
|
282 |
+ tmpz_i = tmpz + i; |
|
283 |
+ |
|
284 |
+ __asm__ volatile ( |
|
285 |
+ "lwc1 %[temp], 0(%[tmpz_n2_i]) \n\t" |
|
286 |
+ "lwc1 %[temp1], 4(%[tmpz_n2_i]) \n\t" |
|
287 |
+ "lwc1 %[pom], 0(%[tmpz_n34_i]) \n\t" |
|
288 |
+ "lwc1 %[pom1], 4(%[tmpz_n34_i]) \n\t" |
|
289 |
+ "mul.s %[temp3], %[w_im], %[temp] \n\t" |
|
290 |
+ "mul.s %[temp4], %[w_im], %[temp1] \n\t" |
|
291 |
+ "mul.s %[pom2], %[w_im], %[pom1] \n\t" |
|
292 |
+ "mul.s %[pom3], %[w_im], %[pom] \n\t" |
|
293 |
+ "msub.s %[tmp2], %[temp3], %[w_re], %[temp1] \n\t" // tmp2 = w_re * tmpz[ n2+i].im - w_im * tmpz[ n2+i].re; |
|
294 |
+ "madd.s %[tmp1], %[temp4], %[w_re], %[temp] \n\t" // tmp1 = w_re * tmpz[ n2+i].re + w_im * tmpz[ n2+i].im; |
|
295 |
+ "msub.s %[tmp3], %[pom2], %[w_re], %[pom] \n\t" // tmp3 = w_re * tmpz[n34+i].re - w_im * tmpz[n34+i].im; |
|
296 |
+ "madd.s %[tmp4], %[pom3], %[w_re], %[pom1] \n\t" // tmp4 = w_re * tmpz[n34+i].im + w_im * tmpz[n34+i].re; |
|
297 |
+ "lwc1 %[temp], 0(%[tmpz_i]) \n\t" |
|
298 |
+ "lwc1 %[pom], 4(%[tmpz_i]) \n\t" |
|
299 |
+ "add.s %[tmp5], %[tmp1], %[tmp3] \n\t" // tmp5 = tmp1 + tmp3; |
|
300 |
+ "sub.s %[tmp1], %[tmp1], %[tmp3] \n\t" // tmp1 = tmp1 - tmp3; |
|
301 |
+ "add.s %[tmp6], %[tmp2], %[tmp4] \n\t" // tmp6 = tmp2 + tmp4; |
|
302 |
+ "sub.s %[tmp2], %[tmp2], %[tmp4] \n\t" // tmp2 = tmp2 - tmp4; |
|
303 |
+ "sub.s %[temp1], %[temp], %[tmp5] \n\t" |
|
304 |
+ "add.s %[temp], %[temp], %[tmp5] \n\t" |
|
305 |
+ "sub.s %[pom1], %[pom], %[tmp6] \n\t" |
|
306 |
+ "add.s %[pom], %[pom], %[tmp6] \n\t" |
|
307 |
+ "lwc1 %[temp3], 0(%[tmpz_n4_i]) \n\t" |
|
308 |
+ "lwc1 %[pom2], 4(%[tmpz_n4_i]) \n\t" |
|
309 |
+ "swc1 %[temp1], 0(%[tmpz_n2_i]) \n\t" // tmpz[ n2+i].re = tmpz[ i].re - tmp5; |
|
310 |
+ "swc1 %[temp], 0(%[tmpz_i]) \n\t" // tmpz[ i].re = tmpz[ i].re + tmp5; |
|
311 |
+ "swc1 %[pom1], 4(%[tmpz_n2_i]) \n\t" // tmpz[ n2+i].im = tmpz[ i].im - tmp6; |
|
312 |
+ "swc1 %[pom] , 4(%[tmpz_i]) \n\t" // tmpz[ i].im = tmpz[ i].im + tmp6; |
|
313 |
+ "sub.s %[temp4], %[temp3], %[tmp2] \n\t" |
|
314 |
+ "add.s %[pom3], %[pom2], %[tmp1] \n\t" |
|
315 |
+ "add.s %[temp3], %[temp3], %[tmp2] \n\t" |
|
316 |
+ "sub.s %[pom2], %[pom2], %[tmp1] \n\t" |
|
317 |
+ "swc1 %[temp4], 0(%[tmpz_n34_i]) \n\t" // tmpz[n34+i].re = tmpz[n4+i].re - tmp2; |
|
318 |
+ "swc1 %[pom3], 4(%[tmpz_n34_i]) \n\t" // tmpz[n34+i].im = tmpz[n4+i].im + tmp1; |
|
319 |
+ "swc1 %[temp3], 0(%[tmpz_n4_i]) \n\t" // tmpz[ n4+i].re = tmpz[n4+i].re + tmp2; |
|
320 |
+ "swc1 %[pom2], 4(%[tmpz_n4_i]) \n\t" // tmpz[ n4+i].im = tmpz[n4+i].im - tmp1; |
|
321 |
+ : [tmp1]"=&f"(tmp1), [tmp2]"=&f" (tmp2), [temp]"=&f"(temp), [tmp3]"=&f"(tmp3), |
|
322 |
+ [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp6]"=&f"(tmp6), |
|
323 |
+ [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), |
|
324 |
+ [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2), [pom3]"=&f"(pom3) |
|
325 |
+ : [w_re]"f"(w_re), [w_im]"f"(w_im), |
|
326 |
+ [tmpz_i]"r"(tmpz_i),[tmpz_n2_i]"r"(tmpz_n2_i), |
|
327 |
+ [tmpz_n34_i]"r"(tmpz_n34_i), [tmpz_n4_i]"r"(tmpz_n4_i) |
|
328 |
+ : "memory" |
|
329 |
+ ); |
|
330 |
+ w_re_ptr += step; |
|
331 |
+ w_im_ptr -= step; |
|
332 |
+ } |
|
333 |
+ } |
|
334 |
+ step >>= 1; |
|
335 |
+ n4 <<= 1; |
|
336 |
+ } |
|
337 |
+} |
|
338 |
+ |
|
339 |
+/** |
|
340 |
+ * MDCT/IMDCT transforms. |
|
341 |
+ */ |
|
342 |
+ |
|
343 |
+static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input) |
|
344 |
+{ |
|
345 |
+ int k, n8, n4, n2, n, j; |
|
346 |
+ const uint16_t *revtab = s->revtab; |
|
347 |
+ const FFTSample *tcos = s->tcos; |
|
348 |
+ const FFTSample *tsin = s->tsin; |
|
349 |
+ const FFTSample *in1, *in2, *in3, *in4; |
|
350 |
+ FFTComplex *z = (FFTComplex *)output; |
|
351 |
+ |
|
352 |
+ int j1; |
|
353 |
+ const float *tcos1, *tsin1, *tcos2, *tsin2; |
|
354 |
+ float temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, |
|
355 |
+ temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16; |
|
356 |
+ FFTComplex *z1, *z2; |
|
357 |
+ |
|
358 |
+ n = 1 << s->mdct_bits; |
|
359 |
+ n2 = n >> 1; |
|
360 |
+ n4 = n >> 2; |
|
361 |
+ n8 = n >> 3; |
|
362 |
+ |
|
363 |
+ /* pre rotation */ |
|
364 |
+ in1 = input; |
|
365 |
+ in2 = input + n2 - 1; |
|
366 |
+ in3 = input + 2; |
|
367 |
+ in4 = input + n2 - 3; |
|
368 |
+ |
|
369 |
+ tcos1 = tcos; |
|
370 |
+ tsin1 = tsin; |
|
371 |
+ |
|
372 |
+ /* n4 = 64 or 128 */ |
|
373 |
+ for(k = 0; k < n4; k += 2) { |
|
374 |
+ j = revtab[k ]; |
|
375 |
+ j1 = revtab[k + 1]; |
|
376 |
+ |
|
377 |
+ __asm__ volatile ( |
|
378 |
+ "lwc1 %[temp1], 0(%[in2]) \t\n" |
|
379 |
+ "lwc1 %[temp2], 0(%[tcos1]) \t\n" |
|
380 |
+ "lwc1 %[temp3], 0(%[tsin1]) \t\n" |
|
381 |
+ "lwc1 %[temp4], 0(%[in1]) \t\n" |
|
382 |
+ "lwc1 %[temp5], 0(%[in4]) \t\n" |
|
383 |
+ "mul.s %[temp9], %[temp1], %[temp2] \t\n" |
|
384 |
+ "mul.s %[temp10], %[temp1], %[temp3] \t\n" |
|
385 |
+ "lwc1 %[temp6], 4(%[tcos1]) \t\n" |
|
386 |
+ "lwc1 %[temp7], 4(%[tsin1]) \t\n" |
|
387 |
+ "nmsub.s %[temp9], %[temp9], %[temp4], %[temp3] \t\n" |
|
388 |
+ "madd.s %[temp10], %[temp10], %[temp4], %[temp2] \t\n" |
|
389 |
+ "mul.s %[temp11], %[temp5], %[temp6] \t\n" |
|
390 |
+ "mul.s %[temp12], %[temp5], %[temp7] \t\n" |
|
391 |
+ "lwc1 %[temp8], 0(%[in3]) \t\n" |
|
392 |
+ "addiu %[tcos1], %[tcos1], 8 \t\n" |
|
393 |
+ "addiu %[tsin1], %[tsin1], 8 \t\n" |
|
394 |
+ "addiu %[in1], %[in1], 16 \t\n" |
|
395 |
+ "nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n" |
|
396 |
+ "madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n" |
|
397 |
+ "addiu %[in2], %[in2], -16 \t\n" |
|
398 |
+ "addiu %[in3], %[in3], 16 \t\n" |
|
399 |
+ "addiu %[in4], %[in4], -16 \t\n" |
|
400 |
+ |
|
401 |
+ : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), |
|
402 |
+ [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), |
|
403 |
+ [temp5]"=&f"(temp5), [temp6]"=&f"(temp6), |
|
404 |
+ [temp7]"=&f"(temp7), [temp8]"=&f"(temp8), |
|
405 |
+ [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), |
|
406 |
+ [temp11]"=&f"(temp11), [temp12]"=&f"(temp12), |
|
407 |
+ [tsin1]"+r"(tsin1), [tcos1]"+r"(tcos1), |
|
408 |
+ [in1]"+r"(in1), [in2]"+r"(in2), |
|
409 |
+ [in3]"+r"(in3), [in4]"+r"(in4) |
|
410 |
+ ); |
|
411 |
+ |
|
412 |
+ z[j ].re = temp9; |
|
413 |
+ z[j ].im = temp10; |
|
414 |
+ z[j1].re = temp11; |
|
415 |
+ z[j1].im = temp12; |
|
416 |
+ } |
|
417 |
+ |
|
418 |
+ s->fft_calc(s, z); |
|
419 |
+ |
|
420 |
+ /* post rotation + reordering */ |
|
421 |
+ /* n8 = 32 or 64 */ |
|
422 |
+ for(k = 0; k < n8; k += 2) { |
|
423 |
+ tcos1 = &tcos[n8 - k - 2]; |
|
424 |
+ tsin1 = &tsin[n8 - k - 2]; |
|
425 |
+ tcos2 = &tcos[n8 + k]; |
|
426 |
+ tsin2 = &tsin[n8 + k]; |
|
427 |
+ z1 = &z[n8 - k - 2]; |
|
428 |
+ z2 = &z[n8 + k ]; |
|
429 |
+ |
|
430 |
+ __asm__ volatile ( |
|
431 |
+ "lwc1 %[temp1], 12(%[z1]) \t\n" |
|
432 |
+ "lwc1 %[temp2], 4(%[tsin1]) \t\n" |
|
433 |
+ "lwc1 %[temp3], 4(%[tcos1]) \t\n" |
|
434 |
+ "lwc1 %[temp4], 8(%[z1]) \t\n" |
|
435 |
+ "lwc1 %[temp5], 4(%[z1]) \t\n" |
|
436 |
+ "mul.s %[temp9], %[temp1], %[temp2] \t\n" |
|
437 |
+ "mul.s %[temp10], %[temp1], %[temp3] \t\n" |
|
438 |
+ "lwc1 %[temp6], 0(%[tsin1]) \t\n" |
|
439 |
+ "lwc1 %[temp7], 0(%[tcos1]) \t\n" |
|
440 |
+ "nmsub.s %[temp9], %[temp9], %[temp4], %[temp3] \t\n" |
|
441 |
+ "madd.s %[temp10], %[temp10], %[temp4], %[temp2] \t\n" |
|
442 |
+ "mul.s %[temp11], %[temp5], %[temp6] \t\n" |
|
443 |
+ "mul.s %[temp12], %[temp5], %[temp7] \t\n" |
|
444 |
+ "lwc1 %[temp8], 0(%[z1]) \t\n" |
|
445 |
+ "lwc1 %[temp1], 4(%[z2]) \t\n" |
|
446 |
+ "lwc1 %[temp2], 0(%[tsin2]) \t\n" |
|
447 |
+ "lwc1 %[temp3], 0(%[tcos2]) \t\n" |
|
448 |
+ "nmsub.s %[temp11], %[temp11], %[temp8], %[temp7] \t\n" |
|
449 |
+ "madd.s %[temp12], %[temp12], %[temp8], %[temp6] \t\n" |
|
450 |
+ "mul.s %[temp13], %[temp1], %[temp2] \t\n" |
|
451 |
+ "mul.s %[temp14], %[temp1], %[temp3] \t\n" |
|
452 |
+ "lwc1 %[temp4], 0(%[z2]) \t\n" |
|
453 |
+ "lwc1 %[temp5], 12(%[z2]) \t\n" |
|
454 |
+ "lwc1 %[temp6], 4(%[tsin2]) \t\n" |
|
455 |
+ "lwc1 %[temp7], 4(%[tcos2]) \t\n" |
|
456 |
+ "nmsub.s %[temp13], %[temp13], %[temp4], %[temp3] \t\n" |
|
457 |
+ "madd.s %[temp14], %[temp14], %[temp4], %[temp2] \t\n" |
|
458 |
+ "mul.s %[temp15], %[temp5], %[temp6] \t\n" |
|
459 |
+ "mul.s %[temp16], %[temp5], %[temp7] \t\n" |
|
460 |
+ "lwc1 %[temp8], 8(%[z2]) \t\n" |
|
461 |
+ "nmsub.s %[temp15], %[temp15], %[temp8], %[temp7] \t\n" |
|
462 |
+ "madd.s %[temp16], %[temp16], %[temp8], %[temp6] \t\n" |
|
463 |
+ : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), |
|
464 |
+ [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), |
|
465 |
+ [temp5]"=&f"(temp5), [temp6]"=&f"(temp6), |
|
466 |
+ [temp7]"=&f"(temp7), [temp8]"=&f"(temp8), |
|
467 |
+ [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), |
|
468 |
+ [temp11]"=&f"(temp11), [temp12]"=&f"(temp12), |
|
469 |
+ [temp13]"=&f"(temp13), [temp14]"=&f"(temp14), |
|
470 |
+ [temp15]"=&f"(temp15), [temp16]"=&f"(temp16) |
|
471 |
+ : [z1]"r"(z1), [z2]"r"(z2), |
|
472 |
+ [tsin1]"r"(tsin1), [tcos1]"r"(tcos1), |
|
473 |
+ [tsin2]"r"(tsin2), [tcos2]"r"(tcos2) |
|
474 |
+ ); |
|
475 |
+ |
|
476 |
+ z1[1].re = temp9; |
|
477 |
+ z1[1].im = temp14; |
|
478 |
+ z2[0].re = temp13; |
|
479 |
+ z2[0].im = temp10; |
|
480 |
+ |
|
481 |
+ z1[0].re = temp11; |
|
482 |
+ z1[0].im = temp16; |
|
483 |
+ z2[1].re = temp15; |
|
484 |
+ z2[1].im = temp12; |
|
485 |
+ } |
|
486 |
+} |
|
487 |
+#endif /* HAVE_INLINE_ASM */ |
|
488 |
+ |
|
489 |
+/** |
|
490 |
+ * Compute inverse MDCT of size N = 2^nbits |
|
491 |
+ * @param output N samples |
|
492 |
+ * @param input N/2 samples |
|
493 |
+ */ |
|
494 |
+static void ff_imdct_calc_mips(FFTContext *s, FFTSample *output, const FFTSample *input) |
|
495 |
+{ |
|
496 |
+ int k; |
|
497 |
+ int n = 1 << s->mdct_bits; |
|
498 |
+ int n2 = n >> 1; |
|
499 |
+ int n4 = n >> 2; |
|
500 |
+ |
|
501 |
+ ff_imdct_half_mips(s, output+n4, input); |
|
502 |
+ |
|
503 |
+ for(k = 0; k < n4; k+=4) { |
|
504 |
+ output[k] = -output[n2-k-1]; |
|
505 |
+ output[k+1] = -output[n2-k-2]; |
|
506 |
+ output[k+2] = -output[n2-k-3]; |
|
507 |
+ output[k+3] = -output[n2-k-4]; |
|
508 |
+ |
|
509 |
+ output[n-k-1] = output[n2+k]; |
|
510 |
+ output[n-k-2] = output[n2+k+1]; |
|
511 |
+ output[n-k-3] = output[n2+k+2]; |
|
512 |
+ output[n-k-4] = output[n2+k+3]; |
|
513 |
+ } |
|
514 |
+} |
|
515 |
+ |
|
516 |
+av_cold void ff_fft_init_mips(FFTContext *s) |
|
517 |
+{ |
|
518 |
+ int n=0; |
|
519 |
+ |
|
520 |
+ ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n); |
|
521 |
+ |
|
522 |
+#if HAVE_INLINE_ASM |
|
523 |
+ s->fft_calc = ff_fft_calc_mips; |
|
524 |
+#endif |
|
525 |
+#if CONFIG_MDCT |
|
526 |
+ s->imdct_calc = ff_imdct_calc_mips; |
|
527 |
+ s->imdct_half = ff_imdct_half_mips; |
|
528 |
+#endif |
|
529 |
+} |
0 | 530 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,63 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 |
|
2 |
+ * MIPS Technologies, Inc., California. |
|
3 |
+ * |
|
4 |
+ * Redistribution and use in source and binary forms, with or without |
|
5 |
+ * modification, are permitted provided that the following conditions |
|
6 |
+ * are met: |
|
7 |
+ * 1. Redistributions of source code must retain the above copyright |
|
8 |
+ * notice, this list of conditions and the following disclaimer. |
|
9 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
10 |
+ * notice, this list of conditions and the following disclaimer in the |
|
11 |
+ * documentation and/or other materials provided with the distribution. |
|
12 |
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
|
13 |
+ * contributors may be used to endorse or promote products derived from |
|
14 |
+ * this software without specific prior written permission. |
|
15 |
+ * |
|
16 |
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
17 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
20 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
21 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
22 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
23 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
24 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
25 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
26 |
+ * SUCH DAMAGE. |
|
27 |
+ * |
|
28 |
+ * Author: Stanislav Ocovaj (socovaj@mips.com) |
|
29 |
+ * |
|
30 |
+ * This file is part of FFmpeg. |
|
31 |
+ * |
|
32 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
33 |
+ * modify it under the terms of the GNU Lesser General Public |
|
34 |
+ * License as published by the Free Software Foundation; either |
|
35 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
36 |
+ * |
|
37 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
38 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
39 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
40 |
+ * Lesser General Public License for more details. |
|
41 |
+ * |
|
42 |
+ * You should have received a copy of the GNU Lesser General Public |
|
43 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
44 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
45 |
+ */ |
|
46 |
+ |
|
47 |
+/** |
|
48 |
+ * @file |
|
49 |
+ * definitions and LUT table for MIPS FFT |
|
50 |
+ */ |
|
51 |
+#ifndef AVCODEC_MIPS_FFT_TABLE_H |
|
52 |
+#define AVCODEC_MIPS_FFT_TABLE_H |
|
53 |
+ |
|
54 |
+#include "libavcodec/fft.h" |
|
55 |
+ |
|
56 |
+#define MAX_LOG2_NFFT 16 //!< Specifies maxiumum allowed fft size |
|
57 |
+#define MAX_FFT_SIZE (1 << MAX_LOG2_NFFT) |
|
58 |
+ |
|
59 |
+extern uint16_t fft_offsets_lut[]; |
|
60 |
+void ff_fft_lut_init(uint16_t *table, int off, int size, int *index); |
|
61 |
+ |
|
62 |
+#endif /* AVCODEC_MIPS_FFT_TABLE_H */ |
0 | 63 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,338 @@ |
0 |
+/* |
|
1 |
+ * Format Conversion Utils for MIPS |
|
2 |
+ * |
|
3 |
+ * Copyright (c) 2012 |
|
4 |
+ * MIPS Technologies, Inc., California. |
|
5 |
+ * |
|
6 |
+ * Redistribution and use in source and binary forms, with or without |
|
7 |
+ * modification, are permitted provided that the following conditions |
|
8 |
+ * are met: |
|
9 |
+ * 1. Redistributions of source code must retain the above copyright |
|
10 |
+ * notice, this list of conditions and the following disclaimer. |
|
11 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
12 |
+ * notice, this list of conditions and the following disclaimer in the |
|
13 |
+ * documentation and/or other materials provided with the distribution. |
|
14 |
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of is |
|
15 |
+ * contributors may be used to endorse or promote products derived from |
|
16 |
+ * this software without specific prior written permission. |
|
17 |
+ * |
|
18 |
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
19 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
20 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
21 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
22 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
23 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
24 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
25 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
26 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
27 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
28 |
+ * SUCH DAMAGE. |
|
29 |
+ * |
|
30 |
+ * Author: Zoran Lukic (zoranl@mips.com) |
|
31 |
+ * Author: Nedeljko Babic (nbabic@mips.com) |
|
32 |
+ * |
|
33 |
+ * This file is part of FFmpeg. |
|
34 |
+ * |
|
35 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
36 |
+ * modify it under the terms of the GNU Lesser General Public |
|
37 |
+ * License as published by the Free Software Foundation; either |
|
38 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
39 |
+ * |
|
40 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
41 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
42 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
43 |
+ * Lesser General Public License for more details. |
|
44 |
+ * |
|
45 |
+ * You should have received a copy of the GNU Lesser General Public |
|
46 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
47 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
48 |
+ */ |
|
49 |
+#include "config.h" |
|
50 |
+#include "libavcodec/avcodec.h" |
|
51 |
+#include "libavcodec/fmtconvert.h" |
|
52 |
+ |
|
53 |
+#if HAVE_MIPSDSPR1 |
|
54 |
+static void float_to_int16_mips(int16_t *dst, const float *src, long len) |
|
55 |
+{ |
|
56 |
+ const float *src_end = src + len; |
|
57 |
+ int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7; |
|
58 |
+ float src0, src1, src2, src3, src4, src5, src6, src7; |
|
59 |
+ |
|
60 |
+ /* |
|
61 |
+ * loop is 8 times unrolled in assembler in order to achieve better performance |
|
62 |
+ */ |
|
63 |
+ __asm__ volatile( |
|
64 |
+ "beq %[len], $zero, fti16_end%= \n\t" |
|
65 |
+ "fti16_lp%=: \n\t" |
|
66 |
+ "lwc1 %[src0], 0(%[src]) \n\t" |
|
67 |
+ "lwc1 %[src1], 4(%[src]) \n\t" |
|
68 |
+ "lwc1 %[src2], 8(%[src]) \n\t" |
|
69 |
+ "lwc1 %[src3], 12(%[src]) \n\t" |
|
70 |
+ "cvt.w.s %[src0], %[src0] \n\t" |
|
71 |
+ "cvt.w.s %[src1], %[src1] \n\t" |
|
72 |
+ "cvt.w.s %[src2], %[src2] \n\t" |
|
73 |
+ "cvt.w.s %[src3], %[src3] \n\t" |
|
74 |
+ "mfc1 %[ret0], %[src0] \n\t" |
|
75 |
+ "mfc1 %[ret1], %[src1] \n\t" |
|
76 |
+ "mfc1 %[ret2], %[src2] \n\t" |
|
77 |
+ "mfc1 %[ret3], %[src3] \n\t" |
|
78 |
+ "lwc1 %[src4], 16(%[src]) \n\t" |
|
79 |
+ "lwc1 %[src5], 20(%[src]) \n\t" |
|
80 |
+ "lwc1 %[src6], 24(%[src]) \n\t" |
|
81 |
+ "lwc1 %[src7], 28(%[src]) \n\t" |
|
82 |
+ "cvt.w.s %[src4], %[src4] \n\t" |
|
83 |
+ "cvt.w.s %[src5], %[src5] \n\t" |
|
84 |
+ "cvt.w.s %[src6], %[src6] \n\t" |
|
85 |
+ "cvt.w.s %[src7], %[src7] \n\t" |
|
86 |
+ "addiu %[src], 32 \n\t" |
|
87 |
+ "shll_s.w %[ret0], %[ret0], 16 \n\t" |
|
88 |
+ "shll_s.w %[ret1], %[ret1], 16 \n\t" |
|
89 |
+ "shll_s.w %[ret2], %[ret2], 16 \n\t" |
|
90 |
+ "shll_s.w %[ret3], %[ret3], 16 \n\t" |
|
91 |
+ "srl %[ret0], %[ret0], 16 \n\t" |
|
92 |
+ "srl %[ret1], %[ret1], 16 \n\t" |
|
93 |
+ "srl %[ret2], %[ret2], 16 \n\t" |
|
94 |
+ "srl %[ret3], %[ret3], 16 \n\t" |
|
95 |
+ "sh %[ret0], 0(%[dst]) \n\t" |
|
96 |
+ "sh %[ret1], 2(%[dst]) \n\t" |
|
97 |
+ "sh %[ret2], 4(%[dst]) \n\t" |
|
98 |
+ "sh %[ret3], 6(%[dst]) \n\t" |
|
99 |
+ "mfc1 %[ret4], %[src4] \n\t" |
|
100 |
+ "mfc1 %[ret5], %[src5] \n\t" |
|
101 |
+ "mfc1 %[ret6], %[src6] \n\t" |
|
102 |
+ "mfc1 %[ret7], %[src7] \n\t" |
|
103 |
+ "shll_s.w %[ret4], %[ret4], 16 \n\t" |
|
104 |
+ "shll_s.w %[ret5], %[ret5], 16 \n\t" |
|
105 |
+ "shll_s.w %[ret6], %[ret6], 16 \n\t" |
|
106 |
+ "shll_s.w %[ret7], %[ret7], 16 \n\t" |
|
107 |
+ "srl %[ret4], %[ret4], 16 \n\t" |
|
108 |
+ "srl %[ret5], %[ret5], 16 \n\t" |
|
109 |
+ "srl %[ret6], %[ret6], 16 \n\t" |
|
110 |
+ "srl %[ret7], %[ret7], 16 \n\t" |
|
111 |
+ "sh %[ret4], 8(%[dst]) \n\t" |
|
112 |
+ "sh %[ret5], 10(%[dst]) \n\t" |
|
113 |
+ "sh %[ret6], 12(%[dst]) \n\t" |
|
114 |
+ "sh %[ret7], 14(%[dst]) \n\t" |
|
115 |
+ "addiu %[dst], 16 \n\t" |
|
116 |
+ "bne %[src], %[src_end], fti16_lp%= \n\t" |
|
117 |
+ "fti16_end%=: \n\t" |
|
118 |
+ : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3), |
|
119 |
+ [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7), |
|
120 |
+ [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3), |
|
121 |
+ [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7), |
|
122 |
+ [src]"+r"(src), [dst]"+r"(dst) |
|
123 |
+ : [src_end]"r"(src_end), [len]"r"(len) |
|
124 |
+ : "memory" |
|
125 |
+ ); |
|
126 |
+} |
|
127 |
+ |
|
128 |
+static void float_to_int16_interleave_mips(int16_t *dst, const float **src, long len, |
|
129 |
+ int channels) |
|
130 |
+{ |
|
131 |
+ int c, ch2 = channels <<1; |
|
132 |
+ int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7; |
|
133 |
+ float src0, src1, src2, src3, src4, src5, src6, src7; |
|
134 |
+ int16_t *dst_ptr0, *dst_ptr1, *dst_ptr2, *dst_ptr3; |
|
135 |
+ int16_t *dst_ptr4, *dst_ptr5, *dst_ptr6, *dst_ptr7; |
|
136 |
+ const float *src_ptr, *src_ptr2, *src_end; |
|
137 |
+ |
|
138 |
+ if (channels == 2) { |
|
139 |
+ src_ptr = &src[0][0]; |
|
140 |
+ src_ptr2 = &src[1][0]; |
|
141 |
+ src_end = src_ptr + len; |
|
142 |
+ |
|
143 |
+ __asm__ volatile ( |
|
144 |
+ "fti16i2_lp%=: \n\t" |
|
145 |
+ "lwc1 %[src0], 0(%[src_ptr]) \n\t" |
|
146 |
+ "lwc1 %[src1], 0(%[src_ptr2]) \n\t" |
|
147 |
+ "addiu %[src_ptr], 4 \n\t" |
|
148 |
+ "cvt.w.s $f9, %[src0] \n\t" |
|
149 |
+ "cvt.w.s $f10, %[src1] \n\t" |
|
150 |
+ "mfc1 %[ret0], $f9 \n\t" |
|
151 |
+ "mfc1 %[ret1], $f10 \n\t" |
|
152 |
+ "shll_s.w %[ret0], %[ret0], 16 \n\t" |
|
153 |
+ "shll_s.w %[ret1], %[ret1], 16 \n\t" |
|
154 |
+ "addiu %[src_ptr2], 4 \n\t" |
|
155 |
+ "srl %[ret0], %[ret0], 16 \n\t" |
|
156 |
+ "srl %[ret1], %[ret1], 16 \n\t" |
|
157 |
+ "sh %[ret0], 0(%[dst]) \n\t" |
|
158 |
+ "sh %[ret1], 2(%[dst]) \n\t" |
|
159 |
+ "addiu %[dst], 4 \n\t" |
|
160 |
+ "bne %[src_ptr], %[src_end], fti16i2_lp%= \n\t" |
|
161 |
+ : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), |
|
162 |
+ [src0]"=&f"(src0), [src1]"=&f"(src1), |
|
163 |
+ [src_ptr]"+r"(src_ptr), [src_ptr2]"+r"(src_ptr2), |
|
164 |
+ [dst]"+r"(dst) |
|
165 |
+ : [src_end]"r"(src_end) |
|
166 |
+ : "memory" |
|
167 |
+ ); |
|
168 |
+ } else { |
|
169 |
+ for (c = 0; c < channels; c++) { |
|
170 |
+ src_ptr = &src[c][0]; |
|
171 |
+ dst_ptr0 = &dst[c]; |
|
172 |
+ src_end = src_ptr + len; |
|
173 |
+ /* |
|
174 |
+ * loop is 8 times unrolled in assembler in order to achieve better performance |
|
175 |
+ */ |
|
176 |
+ __asm__ volatile( |
|
177 |
+ "fti16i_lp%=: \n\t" |
|
178 |
+ "lwc1 %[src0], 0(%[src_ptr]) \n\t" |
|
179 |
+ "lwc1 %[src1], 4(%[src_ptr]) \n\t" |
|
180 |
+ "lwc1 %[src2], 8(%[src_ptr]) \n\t" |
|
181 |
+ "lwc1 %[src3], 12(%[src_ptr]) \n\t" |
|
182 |
+ "cvt.w.s %[src0], %[src0] \n\t" |
|
183 |
+ "cvt.w.s %[src1], %[src1] \n\t" |
|
184 |
+ "cvt.w.s %[src2], %[src2] \n\t" |
|
185 |
+ "cvt.w.s %[src3], %[src3] \n\t" |
|
186 |
+ "mfc1 %[ret0], %[src0] \n\t" |
|
187 |
+ "mfc1 %[ret1], %[src1] \n\t" |
|
188 |
+ "mfc1 %[ret2], %[src2] \n\t" |
|
189 |
+ "mfc1 %[ret3], %[src3] \n\t" |
|
190 |
+ "lwc1 %[src4], 16(%[src_ptr]) \n\t" |
|
191 |
+ "lwc1 %[src5], 20(%[src_ptr]) \n\t" |
|
192 |
+ "lwc1 %[src6], 24(%[src_ptr]) \n\t" |
|
193 |
+ "lwc1 %[src7], 28(%[src_ptr]) \n\t" |
|
194 |
+ "addu %[dst_ptr1], %[dst_ptr0], %[ch2] \n\t" |
|
195 |
+ "addu %[dst_ptr2], %[dst_ptr1], %[ch2] \n\t" |
|
196 |
+ "addu %[dst_ptr3], %[dst_ptr2], %[ch2] \n\t" |
|
197 |
+ "addu %[dst_ptr4], %[dst_ptr3], %[ch2] \n\t" |
|
198 |
+ "addu %[dst_ptr5], %[dst_ptr4], %[ch2] \n\t" |
|
199 |
+ "addu %[dst_ptr6], %[dst_ptr5], %[ch2] \n\t" |
|
200 |
+ "addu %[dst_ptr7], %[dst_ptr6], %[ch2] \n\t" |
|
201 |
+ "addiu %[src_ptr], 32 \n\t" |
|
202 |
+ "cvt.w.s %[src4], %[src4] \n\t" |
|
203 |
+ "cvt.w.s %[src5], %[src5] \n\t" |
|
204 |
+ "cvt.w.s %[src6], %[src6] \n\t" |
|
205 |
+ "cvt.w.s %[src7], %[src7] \n\t" |
|
206 |
+ "shll_s.w %[ret0], %[ret0], 16 \n\t" |
|
207 |
+ "shll_s.w %[ret1], %[ret1], 16 \n\t" |
|
208 |
+ "shll_s.w %[ret2], %[ret2], 16 \n\t" |
|
209 |
+ "shll_s.w %[ret3], %[ret3], 16 \n\t" |
|
210 |
+ "srl %[ret0], %[ret0], 16 \n\t" |
|
211 |
+ "srl %[ret1], %[ret1], 16 \n\t" |
|
212 |
+ "srl %[ret2], %[ret2], 16 \n\t" |
|
213 |
+ "srl %[ret3], %[ret3], 16 \n\t" |
|
214 |
+ "sh %[ret0], 0(%[dst_ptr0]) \n\t" |
|
215 |
+ "sh %[ret1], 0(%[dst_ptr1]) \n\t" |
|
216 |
+ "sh %[ret2], 0(%[dst_ptr2]) \n\t" |
|
217 |
+ "sh %[ret3], 0(%[dst_ptr3]) \n\t" |
|
218 |
+ "mfc1 %[ret4], %[src4] \n\t" |
|
219 |
+ "mfc1 %[ret5], %[src5] \n\t" |
|
220 |
+ "mfc1 %[ret6], %[src6] \n\t" |
|
221 |
+ "mfc1 %[ret7], %[src7] \n\t" |
|
222 |
+ "shll_s.w %[ret4], %[ret4], 16 \n\t" |
|
223 |
+ "shll_s.w %[ret5], %[ret5], 16 \n\t" |
|
224 |
+ "shll_s.w %[ret6], %[ret6], 16 \n\t" |
|
225 |
+ "shll_s.w %[ret7], %[ret7], 16 \n\t" |
|
226 |
+ "srl %[ret4], %[ret4], 16 \n\t" |
|
227 |
+ "srl %[ret5], %[ret5], 16 \n\t" |
|
228 |
+ "srl %[ret6], %[ret6], 16 \n\t" |
|
229 |
+ "srl %[ret7], %[ret7], 16 \n\t" |
|
230 |
+ "sh %[ret4], 0(%[dst_ptr4]) \n\t" |
|
231 |
+ "sh %[ret5], 0(%[dst_ptr5]) \n\t" |
|
232 |
+ "sh %[ret6], 0(%[dst_ptr6]) \n\t" |
|
233 |
+ "sh %[ret7], 0(%[dst_ptr7]) \n\t" |
|
234 |
+ "addu %[dst_ptr0], %[dst_ptr7], %[ch2] \n\t" |
|
235 |
+ "bne %[src_ptr], %[src_end], fti16i_lp%= \n\t" |
|
236 |
+ : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3), |
|
237 |
+ [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7), |
|
238 |
+ [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3), |
|
239 |
+ [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7), |
|
240 |
+ [dst_ptr1]"=&r"(dst_ptr1), [dst_ptr2]"=&r"(dst_ptr2), [dst_ptr3]"=&r"(dst_ptr3), |
|
241 |
+ [dst_ptr4]"=&r"(dst_ptr4), [dst_ptr5]"=&r"(dst_ptr5), [dst_ptr6]"=&r"(dst_ptr6), |
|
242 |
+ [dst_ptr7]"=&r"(dst_ptr7), [dst_ptr0]"+r"(dst_ptr0), [src_ptr]"+r"(src_ptr) |
|
243 |
+ : [ch2]"r"(ch2), [src_end]"r"(src_end) |
|
244 |
+ : "memory" |
|
245 |
+ ); |
|
246 |
+ } |
|
247 |
+ } |
|
248 |
+} |
|
249 |
+#endif /* HAVE_MIPSDSPR1 */ |
|
250 |
+ |
|
251 |
+static void int32_to_float_fmul_scalar_mips(float *dst, const int *src, |
|
252 |
+ float mul, int len) |
|
253 |
+{ |
|
254 |
+ /* |
|
255 |
+ * variables used in inline assembler |
|
256 |
+ */ |
|
257 |
+ float temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15; |
|
258 |
+ |
|
259 |
+ int rpom1, rpom2, rpom11, rpom21, rpom12, rpom22, rpom13, rpom23; |
|
260 |
+ const int *src_end = src + len; |
|
261 |
+ /* |
|
262 |
+ * loop is 8 times unrolled in assembler in order to achieve better performance |
|
263 |
+ */ |
|
264 |
+ __asm__ volatile ( |
|
265 |
+ "i32tf_lp%=: \n\t" |
|
266 |
+ "lw %[rpom11], 0(%[src]) \n\t" |
|
267 |
+ "lw %[rpom21], 4(%[src]) \n\t" |
|
268 |
+ "lw %[rpom1], 8(%[src]) \n\t" |
|
269 |
+ "lw %[rpom2], 12(%[src]) \n\t" |
|
270 |
+ "mtc1 %[rpom11], %[temp1] \n\t" |
|
271 |
+ "mtc1 %[rpom21], %[temp3] \n\t" |
|
272 |
+ "mtc1 %[rpom1], %[temp5] \n\t" |
|
273 |
+ "mtc1 %[rpom2], %[temp7] \n\t" |
|
274 |
+ |
|
275 |
+ "lw %[rpom13], 16(%[src]) \n\t" |
|
276 |
+ "lw %[rpom23], 20(%[src]) \n\t" |
|
277 |
+ "lw %[rpom12], 24(%[src]) \n\t" |
|
278 |
+ "lw %[rpom22], 28(%[src]) \n\t" |
|
279 |
+ "mtc1 %[rpom13], %[temp9] \n\t" |
|
280 |
+ "mtc1 %[rpom23], %[temp11] \n\t" |
|
281 |
+ "mtc1 %[rpom12], %[temp13] \n\t" |
|
282 |
+ "mtc1 %[rpom22], %[temp15] \n\t" |
|
283 |
+ |
|
284 |
+ "addiu %[src], 32 \n\t" |
|
285 |
+ "cvt.s.w %[temp1], %[temp1] \n\t" |
|
286 |
+ "cvt.s.w %[temp3], %[temp3] \n\t" |
|
287 |
+ "cvt.s.w %[temp5], %[temp5] \n\t" |
|
288 |
+ "cvt.s.w %[temp7], %[temp7] \n\t" |
|
289 |
+ |
|
290 |
+ "cvt.s.w %[temp9], %[temp9] \n\t" |
|
291 |
+ "cvt.s.w %[temp11], %[temp11] \n\t" |
|
292 |
+ "cvt.s.w %[temp13], %[temp13] \n\t" |
|
293 |
+ "cvt.s.w %[temp15], %[temp15] \n\t" |
|
294 |
+ |
|
295 |
+ "mul.s %[temp1], %[temp1], %[mul] \n\t" |
|
296 |
+ "mul.s %[temp3], %[temp3], %[mul] \n\t" |
|
297 |
+ "mul.s %[temp5], %[temp5], %[mul] \n\t" |
|
298 |
+ "mul.s %[temp7], %[temp7], %[mul] \n\t" |
|
299 |
+ |
|
300 |
+ "mul.s %[temp9], %[temp9], %[mul] \n\t" |
|
301 |
+ "mul.s %[temp11], %[temp11], %[mul] \n\t" |
|
302 |
+ "mul.s %[temp13], %[temp13], %[mul] \n\t" |
|
303 |
+ "mul.s %[temp15], %[temp15], %[mul] \n\t" |
|
304 |
+ |
|
305 |
+ "swc1 %[temp1], 0(%[dst]) \n\t" /*dst[i] = src[i] * mul; */ |
|
306 |
+ "swc1 %[temp3], 4(%[dst]) \n\t" /*dst[i+1] = src[i+1] * mul;*/ |
|
307 |
+ "swc1 %[temp5], 8(%[dst]) \n\t" /*dst[i+2] = src[i+2] * mul;*/ |
|
308 |
+ "swc1 %[temp7], 12(%[dst]) \n\t" /*dst[i+3] = src[i+3] * mul;*/ |
|
309 |
+ |
|
310 |
+ "swc1 %[temp9], 16(%[dst]) \n\t" /*dst[i+4] = src[i+4] * mul;*/ |
|
311 |
+ "swc1 %[temp11], 20(%[dst]) \n\t" /*dst[i+5] = src[i+5] * mul;*/ |
|
312 |
+ "swc1 %[temp13], 24(%[dst]) \n\t" /*dst[i+6] = src[i+6] * mul;*/ |
|
313 |
+ "swc1 %[temp15], 28(%[dst]) \n\t" /*dst[i+7] = src[i+7] * mul;*/ |
|
314 |
+ "addiu %[dst], 32 \n\t" |
|
315 |
+ "bne %[src], %[src_end], i32tf_lp%= \n\t" |
|
316 |
+ : [temp1]"=&f"(temp1), [temp11]"=&f"(temp11), |
|
317 |
+ [temp13]"=&f"(temp13), [temp15]"=&f"(temp15), |
|
318 |
+ [temp3]"=&f"(temp3), [temp5]"=&f"(temp5), |
|
319 |
+ [temp7]"=&f"(temp7), [temp9]"=&f"(temp9), |
|
320 |
+ [rpom1]"=&r"(rpom1), [rpom2]"=&r"(rpom2), |
|
321 |
+ [rpom11]"=&r"(rpom11), [rpom21]"=&r"(rpom21), |
|
322 |
+ [rpom12]"=&r"(rpom12), [rpom22]"=&r"(rpom22), |
|
323 |
+ [rpom13]"=&r"(rpom13), [rpom23]"=&r"(rpom23), |
|
324 |
+ [dst]"+r"(dst), [src]"+r"(src) |
|
325 |
+ : [mul]"f"(mul), [src_end]"r"(src_end) |
|
326 |
+ : "memory" |
|
327 |
+ ); |
|
328 |
+} |
|
329 |
+ |
|
330 |
+av_cold void ff_fmt_convert_init_mips(FmtConvertContext *c) |
|
331 |
+{ |
|
332 |
+#if HAVE_MIPSDSPR1 |
|
333 |
+ c->float_to_int16_interleave = float_to_int16_interleave_mips; |
|
334 |
+ c->float_to_int16 = float_to_int16_mips; |
|
335 |
+#endif |
|
336 |
+ c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_mips; |
|
337 |
+} |