Signed-off-by: Bojan Zivkovic <bojan@mips.com>
Reviewed-by: Nedeljko Babic <Nedeljko.Babic@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -311,6 +311,13 @@ av_cold void ff_iir_filter_free_coeffs(struct FFIIRFilterCoeffs *coeffs) |
311 | 311 |
av_free(coeffs); |
312 | 312 |
} |
313 | 313 |
|
314 |
+void ff_iir_filter_init(FFIIRFilterContext *f) { |
|
315 |
+ f->filter_flt = ff_iir_filter_flt; |
|
316 |
+ |
|
317 |
+ if (HAVE_MIPSFPU) |
|
318 |
+ ff_iir_filter_init_mips(f); |
|
319 |
+} |
|
320 |
+ |
|
314 | 321 |
#ifdef TEST |
315 | 322 |
#include <stdio.h> |
316 | 323 |
|
... | ... |
@@ -47,6 +47,29 @@ enum IIRFilterMode{ |
47 | 47 |
FF_FILTER_MODE_BANDSTOP, |
48 | 48 |
}; |
49 | 49 |
|
50 |
+typedef struct FFIIRFilterContext { |
|
51 |
+ /** |
|
52 |
+ * Perform IIR filtering on floating-point input samples. |
|
53 |
+ * |
|
54 |
+ * @param coeffs pointer to filter coefficients |
|
55 |
+ * @param state pointer to filter state |
|
56 |
+ * @param size input length |
|
57 |
+ * @param src source samples |
|
58 |
+ * @param sstep source stride |
|
59 |
+ * @param dst filtered samples (destination may be the same as input) |
|
60 |
+ * @param dstep destination stride |
|
61 |
+ */ |
|
62 |
+ void (*filter_flt)(const struct FFIIRFilterCoeffs *coeffs, |
|
63 |
+ struct FFIIRFilterState *state, int size, |
|
64 |
+ const float *src, int sstep, float *dst, int dstep); |
|
65 |
+} FFIIRFilterContext; |
|
66 |
+ |
|
67 |
+/** |
|
68 |
+ * Initialize FFIIRFilterContext |
|
69 |
+ */ |
|
70 |
+void ff_iir_filter_init(FFIIRFilterContext *f); |
|
71 |
+void ff_iir_filter_init_mips(FFIIRFilterContext *f); |
|
72 |
+ |
|
50 | 73 |
/** |
51 | 74 |
* Initialize filter coefficients. |
52 | 75 |
* |
21 | 22 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,204 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2012 |
|
2 |
+ * MIPS Technologies, Inc., California. |
|
3 |
+ * |
|
4 |
+ * Redistribution and use in source and binary forms, with or without |
|
5 |
+ * modification, are permitted provided that the following conditions |
|
6 |
+ * are met: |
|
7 |
+ * 1. Redistributions of source code must retain the above copyright |
|
8 |
+ * notice, this list of conditions and the following disclaimer. |
|
9 |
+ * 2. Redistributions in binary form must reproduce the above copyright |
|
10 |
+ * notice, this list of conditions and the following disclaimer in the |
|
11 |
+ * documentation and/or other materials provided with the distribution. |
|
12 |
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its |
|
13 |
+ * contributors may be used to endorse or promote products derived from |
|
14 |
+ * this software without specific prior written permission. |
|
15 |
+ * |
|
16 |
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND |
|
17 |
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 |
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE |
|
20 |
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
21 |
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
22 |
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
23 |
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
24 |
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
25 |
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
26 |
+ * SUCH DAMAGE. |
|
27 |
+ * |
|
28 |
+ * Author: Bojan Zivkovic (bojan@mips.com) |
|
29 |
+ * |
|
30 |
+ * IIR filter optimized for MIPS floating-point architecture |
|
31 |
+ * |
|
32 |
+ * This file is part of FFmpeg. |
|
33 |
+ * |
|
34 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
35 |
+ * modify it under the terms of the GNU Lesser General Public |
|
36 |
+ * License as published by the Free Software Foundation; either |
|
37 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
38 |
+ * |
|
39 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
40 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
41 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
42 |
+ * Lesser General Public License for more details. |
|
43 |
+ * |
|
44 |
+ * You should have received a copy of the GNU Lesser General Public |
|
45 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
46 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
47 |
+ */ |
|
48 |
+ |
|
49 |
+ /** |
|
50 |
+ * @file |
|
51 |
+ * Reference: libavcodec/iirfilter.c |
|
52 |
+ */ |
|
53 |
+ |
|
54 |
+#include "libavcodec/iirfilter.h" |
|
55 |
+ |
|
56 |
+#if HAVE_INLINE_ASM |
|
57 |
+typedef struct FFIIRFilterCoeffs { |
|
58 |
+ int order; |
|
59 |
+ float gain; |
|
60 |
+ int *cx; |
|
61 |
+ float *cy; |
|
62 |
+} FFIIRFilterCoeffs; |
|
63 |
+ |
|
64 |
+typedef struct FFIIRFilterState { |
|
65 |
+ float x[1]; |
|
66 |
+} FFIIRFilterState; |
|
67 |
+ |
|
68 |
+static void ff_iir_filter_flt_mips(const struct FFIIRFilterCoeffs *c, |
|
69 |
+ struct FFIIRFilterState *s, int size, |
|
70 |
+ const float *src, int sstep, float *dst, int dstep) |
|
71 |
+{ |
|
72 |
+ if (c->order == 2) { |
|
73 |
+ int i; |
|
74 |
+ const float *src0 = src; |
|
75 |
+ float *dst0 = dst; |
|
76 |
+ for (i = 0; i < size; i++) { |
|
77 |
+ float in = *src0 * c->gain + s->x[0] * c->cy[0] + s->x[1] * c->cy[1]; |
|
78 |
+ *dst0 = s->x[0] + in + s->x[1] * c->cx[1]; |
|
79 |
+ s->x[0] = s->x[1]; |
|
80 |
+ s->x[1] = in; |
|
81 |
+ src0 += sstep; |
|
82 |
+ dst0 += dstep; |
|
83 |
+ } |
|
84 |
+ } else if (c->order == 4) { |
|
85 |
+ int i; |
|
86 |
+ const float *src0 = src; |
|
87 |
+ float *dst0 = dst; |
|
88 |
+ float four = 4.0; |
|
89 |
+ float six = 6.0; |
|
90 |
+ for (i = 0; i < size; i += 4) { |
|
91 |
+ float in1, in2, in3, in4; |
|
92 |
+ float res1, res2, res3, res4; |
|
93 |
+ float *x = s->x; |
|
94 |
+ float *cy = c->cy; |
|
95 |
+ float gain = c->gain; |
|
96 |
+ float src0_0 = src0[0 ]; |
|
97 |
+ float src0_1 = src0[sstep ]; |
|
98 |
+ float src0_2 = src0[2*sstep]; |
|
99 |
+ float src0_3 = src0[3*sstep]; |
|
100 |
+ |
|
101 |
+ __asm__ volatile ( |
|
102 |
+ "lwc1 $f0, 0(%[cy]) \n\t" |
|
103 |
+ "lwc1 $f4, 0(%[x]) \n\t" |
|
104 |
+ "lwc1 $f5, 4(%[x]) \n\t" |
|
105 |
+ "lwc1 $f6, 8(%[x]) \n\t" |
|
106 |
+ "lwc1 $f7, 12(%[x]) \n\t" |
|
107 |
+ "mul.s %[in1], %[src0_0], %[gain] \n\t" |
|
108 |
+ "mul.s %[in2], %[src0_1], %[gain] \n\t" |
|
109 |
+ "mul.s %[in3], %[src0_2], %[gain] \n\t" |
|
110 |
+ "mul.s %[in4], %[src0_3], %[gain] \n\t" |
|
111 |
+ "lwc1 $f1, 4(%[cy]) \n\t" |
|
112 |
+ "madd.s %[in1], %[in1], $f0, $f4 \n\t" |
|
113 |
+ "madd.s %[in2], %[in2], $f0, $f5 \n\t" |
|
114 |
+ "madd.s %[in3], %[in3], $f0, $f6 \n\t" |
|
115 |
+ "madd.s %[in4], %[in4], $f0, $f7 \n\t" |
|
116 |
+ "lwc1 $f2, 8(%[cy]) \n\t" |
|
117 |
+ "madd.s %[in1], %[in1], $f1, $f5 \n\t" |
|
118 |
+ "madd.s %[in2], %[in2], $f1, $f6 \n\t" |
|
119 |
+ "madd.s %[in3], %[in3], $f1, $f7 \n\t" |
|
120 |
+ "lwc1 $f3, 12(%[cy]) \n\t" |
|
121 |
+ "add.s $f8, $f5, $f7 \n\t" |
|
122 |
+ "madd.s %[in1], %[in1], $f2, $f6 \n\t" |
|
123 |
+ "madd.s %[in2], %[in2], $f2, $f7 \n\t" |
|
124 |
+ "mul.s $f9, $f6, %[six] \n\t" |
|
125 |
+ "mul.s $f10, $f7, %[six] \n\t" |
|
126 |
+ "madd.s %[in1], %[in1], $f3, $f7 \n\t" |
|
127 |
+ "madd.s %[in2], %[in2], $f3, %[in1] \n\t" |
|
128 |
+ "madd.s %[in3], %[in3], $f2, %[in1] \n\t" |
|
129 |
+ "madd.s %[in4], %[in4], $f1, %[in1] \n\t" |
|
130 |
+ "add.s %[res1], $f4, %[in1] \n\t" |
|
131 |
+ "swc1 %[in1], 0(%[x]) \n\t" |
|
132 |
+ "add.s $f0, $f6, %[in1] \n\t" |
|
133 |
+ "madd.s %[in3], %[in3], $f3, %[in2] \n\t" |
|
134 |
+ "madd.s %[in4], %[in4], $f2, %[in2] \n\t" |
|
135 |
+ "add.s %[res2], $f5, %[in2] \n\t" |
|
136 |
+ "madd.s %[res1], %[res1], $f8, %[four] \n\t" |
|
137 |
+ "add.s $f8, $f7, %[in2] \n\t" |
|
138 |
+ "swc1 %[in2], 4(%[x]) \n\t" |
|
139 |
+ "madd.s %[in4], %[in4], $f3, %[in3] \n\t" |
|
140 |
+ "add.s %[res3], $f6, %[in3] \n\t" |
|
141 |
+ "add.s %[res1], %[res1], $f9 \n\t" |
|
142 |
+ "madd.s %[res2], %[res2], $f0, %[four] \n\t" |
|
143 |
+ "swc1 %[in3], 8(%[x]) \n\t" |
|
144 |
+ "add.s %[res4], $f7, %[in4] \n\t" |
|
145 |
+ "madd.s %[res3], %[res3], $f8, %[four] \n\t" |
|
146 |
+ "swc1 %[in4], 12(%[x]) \n\t" |
|
147 |
+ "add.s %[res2], %[res2], $f10 \n\t" |
|
148 |
+ "add.s $f8, %[in1], %[in3] \n\t" |
|
149 |
+ "madd.s %[res3], %[res3], %[in1], %[six] \n\t" |
|
150 |
+ "madd.s %[res4], %[res4], $f8, %[four] \n\t" |
|
151 |
+ "madd.s %[res4], %[res4], %[in2], %[six] \n\t" |
|
152 |
+ |
|
153 |
+ : [in1]"=&f"(in1), [in2]"=&f"(in2), |
|
154 |
+ [in3]"=&f"(in3), [in4]"=&f"(in4), |
|
155 |
+ [res1]"=&f"(res1), [res2]"=&f"(res2), |
|
156 |
+ [res3]"=&f"(res3), [res4]"=&f"(res4) |
|
157 |
+ : [src0_0]"f"(src0_0), [src0_1]"f"(src0_1), |
|
158 |
+ [src0_2]"f"(src0_2), [src0_3]"f"(src0_3), |
|
159 |
+ [gain]"f"(gain), [x]"r"(x), [cy]"r"(cy), |
|
160 |
+ [four]"f"(four), [six]"f"(six) |
|
161 |
+ : "$f0", "$f1", "$f2", "$f3", |
|
162 |
+ "$f4", "$f5", "$f6", "$f7", |
|
163 |
+ "$f8", "$f9", "$f10", |
|
164 |
+ "memory" |
|
165 |
+ ); |
|
166 |
+ |
|
167 |
+ dst0[0 ] = res1; |
|
168 |
+ dst0[sstep ] = res2; |
|
169 |
+ dst0[2*sstep] = res3; |
|
170 |
+ dst0[3*sstep] = res4; |
|
171 |
+ |
|
172 |
+ src0 += 4*sstep; |
|
173 |
+ dst0 += 4*dstep; |
|
174 |
+ } |
|
175 |
+ } else { |
|
176 |
+ int i; |
|
177 |
+ const float *src0 = src; |
|
178 |
+ float *dst0 = dst; |
|
179 |
+ for (i = 0; i < size; i++) { |
|
180 |
+ int j; |
|
181 |
+ float in, res; |
|
182 |
+ in = *src0 * c->gain; |
|
183 |
+ for(j = 0; j < c->order; j++) |
|
184 |
+ in += c->cy[j] * s->x[j]; |
|
185 |
+ res = s->x[0] + in + s->x[c->order >> 1] * c->cx[c->order >> 1]; |
|
186 |
+ for(j = 1; j < c->order >> 1; j++) |
|
187 |
+ res += (s->x[j] + s->x[c->order - j]) * c->cx[j]; |
|
188 |
+ for(j = 0; j < c->order - 1; j++) |
|
189 |
+ s->x[j] = s->x[j + 1]; |
|
190 |
+ *dst0 = res; |
|
191 |
+ s->x[c->order - 1] = in; |
|
192 |
+ src0 += sstep; |
|
193 |
+ dst0 += dstep; |
|
194 |
+ } |
|
195 |
+ } |
|
196 |
+} |
|
197 |
+#endif /* HAVE_INLINE_ASM */ |
|
198 |
+ |
|
199 |
+void ff_iir_filter_init_mips(FFIIRFilterContext *f) { |
|
200 |
+#if HAVE_INLINE_ASM |
|
201 |
+ f->filter_flt = ff_iir_filter_flt_mips; |
|
202 |
+#endif /* HAVE_INLINE_ASM */ |
|
203 |
+} |
... | ... |
@@ -88,6 +88,7 @@ typedef struct FFPsyPreprocessContext{ |
88 | 88 |
float stereo_att; |
89 | 89 |
struct FFIIRFilterCoeffs *fcoeffs; |
90 | 90 |
struct FFIIRFilterState **fstate; |
91 |
+ struct FFIIRFilterContext fiir; |
|
91 | 92 |
}FFPsyPreprocessContext; |
92 | 93 |
|
93 | 94 |
#define FILT_ORDER 4 |
... | ... |
@@ -115,6 +116,9 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av |
115 | 115 |
for (i = 0; i < avctx->channels; i++) |
116 | 116 |
ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER); |
117 | 117 |
} |
118 |
+ |
|
119 |
+ ff_iir_filter_init(&ctx->fiir); |
|
120 |
+ |
|
118 | 121 |
return ctx; |
119 | 122 |
} |
120 | 123 |
|
... | ... |
@@ -122,11 +126,12 @@ void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int ch |
122 | 122 |
{ |
123 | 123 |
int ch; |
124 | 124 |
int frame_size = ctx->avctx->frame_size; |
125 |
+ FFIIRFilterContext *iir = &ctx->fiir; |
|
125 | 126 |
|
126 | 127 |
if (ctx->fstate) { |
127 | 128 |
for (ch = 0; ch < channels; ch++) |
128 |
- ff_iir_filter_flt(ctx->fcoeffs, ctx->fstate[ch], frame_size, |
|
129 |
- &audio[ch][frame_size], 1, &audio[ch][frame_size], 1); |
|
129 |
+ iir->filter_flt(ctx->fcoeffs, ctx->fstate[ch], frame_size, |
|
130 |
+ &audio[ch][frame_size], 1, &audio[ch][frame_size], 1); |
|
130 | 131 |
} |
131 | 132 |
} |
132 | 133 |
|