Browse code

DCA: break out lfe_interpolation_fir() inner loops to a function

This enables SIMD optimisations of this function.

Originally committed as revision 22861 to svn://svn.ffmpeg.org/ffmpeg/trunk

Måns Rullgård authored on 2010/04/13 05:45:25
Showing 4 changed files
... ...
@@ -87,7 +87,7 @@ OBJS-$(CONFIG_CLJR_ENCODER)            += cljr.o
87 87
 OBJS-$(CONFIG_COOK_DECODER)            += cook.o
88 88
 OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o
89 89
 OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
90
-OBJS-$(CONFIG_DCA_DECODER)             += dca.o synth_filter.o
90
+OBJS-$(CONFIG_DCA_DECODER)             += dca.o synth_filter.o dcadsp.o
91 91
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
92 92
 OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o       \
93 93
                                           mpegvideo_enc.o motion_est.o \
... ...
@@ -41,6 +41,7 @@
41 41
 #include "dcahuff.h"
42 42
 #include "dca.h"
43 43
 #include "synth_filter.h"
44
+#include "dcadsp.h"
44 45
 
45 46
 //#define TRACE
46 47
 
... ...
@@ -256,6 +257,7 @@ typedef struct {
256 256
     DSPContext dsp;
257 257
     FFTContext imdct;
258 258
     SynthFilterContext synth;
259
+    DCADSPContext dcadsp;
259 260
 } DCAContext;
260 261
 
261 262
 static const uint16_t dca_vlc_offs[] = {
... ...
@@ -788,7 +790,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
788 788
     }
789 789
 }
790 790
 
791
-static void lfe_interpolation_fir(int decimation_select,
791
+static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
792 792
                                   int num_deci_sample, float *samples_in,
793 793
                                   float *samples_out, float scale,
794 794
                                   float bias)
... ...
@@ -801,7 +803,7 @@ static void lfe_interpolation_fir(int decimation_select,
801 801
      * samples_out: An array holding interpolated samples
802 802
      */
803 803
 
804
-    int decifactor, k, j;
804
+    int decifactor;
805 805
     const float *prCoeff;
806 806
     int deciindex;
807 807
 
... ...
@@ -815,25 +817,10 @@ static void lfe_interpolation_fir(int decimation_select,
815 815
     }
816 816
     /* Interpolation */
817 817
     for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
818
-        float *samples_out2 = samples_out + decifactor;
819
-        const float *cf0 = prCoeff;
820
-        const float *cf1 = prCoeff + 256;
821
-
822
-        /* One decimated sample generates 2*decifactor interpolated ones */
823
-        for (k = 0; k < decifactor; k++) {
824
-            float v0 = 0.0;
825
-            float v1 = 0.0;
826
-            for (j = 0; j < 256 / decifactor; j++) {
827
-                float s = samples_in[-j];
828
-                v0 += s * *cf0++;
829
-                v1 += s * *--cf1;
830
-            }
831
-            *samples_out++  = (v0 * scale) + bias;
832
-            *samples_out2++ = (v1 * scale) + bias;
833
-        }
834
-
818
+        s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
819
+                          scale, bias);
835 820
         samples_in++;
836
-        samples_out += decifactor;
821
+        samples_out += 2 * decifactor;
837 822
     }
838 823
 }
839 824
 
... ...
@@ -1083,7 +1070,7 @@ static int dca_subsubframe(DCAContext * s)
1083 1083
     if (s->output & DCA_LFE) {
1084 1084
         int lfe_samples = 2 * s->lfe * s->subsubframes;
1085 1085
 
1086
-        lfe_interpolation_fir(s->lfe, 2 * s->lfe,
1086
+        lfe_interpolation_fir(s, s->lfe, 2 * s->lfe,
1087 1087
                               s->lfe_data + lfe_samples +
1088 1088
                               2 * s->lfe * subsubframe,
1089 1089
                               &s->samples[256 * dca_lfe_index[s->amode]],
... ...
@@ -1313,6 +1300,7 @@ static av_cold int dca_decode_init(AVCodecContext * avctx)
1313 1313
     dsputil_init(&s->dsp, avctx);
1314 1314
     ff_mdct_init(&s->imdct, 6, 1, 1.0);
1315 1315
     ff_synth_filter_init(&s->synth);
1316
+    ff_dcadsp_init(&s->dcadsp);
1316 1317
 
1317 1318
     for(i = 0; i < 6; i++)
1318 1319
         s->samples_chanptr[i] = s->samples + i * 256;
1319 1320
new file mode 100644
... ...
@@ -0,0 +1,49 @@
0
+/*
1
+ * Copyright (c) 2004 Gildas Bazin
2
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
3
+ *
4
+ * This file is part of FFmpeg.
5
+ *
6
+ * FFmpeg is free software; you can redistribute it and/or
7
+ * modify it under the terms of the GNU Lesser General Public
8
+ * License as published by the Free Software Foundation; either
9
+ * version 2.1 of the License, or (at your option) any later version.
10
+ *
11
+ * FFmpeg is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
+ * Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public
17
+ * License along with FFmpeg; if not, write to the Free Software
18
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
+ */
20
+
21
+#include "dcadsp.h"
22
+
23
+static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
24
+                          int decifactor, float scale, float bias)
25
+{
26
+    float *out2 = out + decifactor;
27
+    const float *cf0 = coefs;
28
+    const float *cf1 = coefs + 256;
29
+    int j, k;
30
+
31
+    /* One decimated sample generates 2*decifactor interpolated ones */
32
+    for (k = 0; k < decifactor; k++) {
33
+        float v0 = 0.0;
34
+        float v1 = 0.0;
35
+        for (j = 0; j < 256 / decifactor; j++) {
36
+            float s = in[-j];
37
+            v0 += s * *cf0++;
38
+            v1 += s * *--cf1;
39
+        }
40
+        *out++  = (v0 * scale) + bias;
41
+        *out2++ = (v1 * scale) + bias;
42
+    }
43
+}
44
+
45
+void ff_dcadsp_init(DCADSPContext *s)
46
+{
47
+    s->lfe_fir = dca_lfe_fir_c;
48
+}
0 49
new file mode 100644
... ...
@@ -0,0 +1,29 @@
0
+/*
1
+ * This file is part of FFmpeg.
2
+ *
3
+ * FFmpeg is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * FFmpeg is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with FFmpeg; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#ifndef AVCODEC_DCADSP_H
19
+#define AVCODEC_DCADSP_H
20
+
21
+typedef struct DCADSPContext {
22
+    void (*lfe_fir)(float *out, const float *in, const float *coefs,
23
+                    int decifactor, float scale, float bias);
24
+} DCADSPContext;
25
+
26
+void ff_dcadsp_init(DCADSPContext *s);
27
+
28
+#endif /* AVCODEC_DCADSP_H */