40% faster in ff_iir_filter_flt() when c->order == 2.
(cherry picked from commit 37cb3eb53449ccefbbe8ea7dc5e66f9036aafe6e)
... | ... |
@@ -256,11 +256,29 @@ av_cold struct FFIIRFilterState* ff_iir_filter_init_state(int order) |
256 | 256 |
} \ |
257 | 257 |
} |
258 | 258 |
|
259 |
+#define FILTER_O2(type, fmt) { \ |
|
260 |
+ int i; \ |
|
261 |
+ const type *src0 = src; \ |
|
262 |
+ type *dst0 = dst; \ |
|
263 |
+ for (i = 0; i < size; i++) { \ |
|
264 |
+ float in = *src0 * c->gain + \ |
|
265 |
+ s->x[0] * c->cy[0] + \ |
|
266 |
+ s->x[1] * c->cy[1]; \ |
|
267 |
+ CONV_##fmt(*dst0, s->x[0] + in + s->x[1] * c->cx[1]) \ |
|
268 |
+ s->x[0] = s->x[1]; \ |
|
269 |
+ s->x[1] = in; \ |
|
270 |
+ src0 += sstep; \ |
|
271 |
+ dst0 += dstep; \ |
|
272 |
+ } \ |
|
273 |
+} |
|
274 |
+ |
|
259 | 275 |
void ff_iir_filter(const struct FFIIRFilterCoeffs *c, |
260 | 276 |
struct FFIIRFilterState *s, int size, |
261 | 277 |
const int16_t *src, int sstep, int16_t *dst, int dstep) |
262 | 278 |
{ |
263 |
- if (c->order == 4) { |
|
279 |
+ if (c->order == 2) { |
|
280 |
+ FILTER_O2(int16_t, S16) |
|
281 |
+ } else if (c->order == 4) { |
|
264 | 282 |
FILTER_BW_O4(int16_t, S16) |
265 | 283 |
} else { |
266 | 284 |
FILTER_DIRECT_FORM_II(int16_t, S16) |
... | ... |
@@ -271,7 +289,9 @@ void ff_iir_filter_flt(const struct FFIIRFilterCoeffs *c, |
271 | 271 |
struct FFIIRFilterState *s, int size, |
272 | 272 |
const float *src, int sstep, float *dst, int dstep) |
273 | 273 |
{ |
274 |
- if (c->order == 4) { |
|
274 |
+ if (c->order == 2) { |
|
275 |
+ FILTER_O2(float, FLT) |
|
276 |
+ } else if (c->order == 4) { |
|
275 | 277 |
FILTER_BW_O4(float, FLT) |
276 | 278 |
} else { |
277 | 279 |
FILTER_DIRECT_FORM_II(float, FLT) |