Browse code

Convert floating-point MDCT coefficients to 24-bit fixed-point all at once instead of doing it separately in 2 different functions. This makes float AC-3 encoding approx. 3-7% faster overall. Also, the coefficient conversion can now be easily SIMD-optimized.

Originally committed as revision 26232 to svn://svn.ffmpeg.org/ffmpeg/trunk

Justin Ruggles authored on 2011/01/06 05:35:36
Showing 5 changed files
... ...
@@ -62,6 +62,7 @@
62 62
 typedef struct AC3Block {
63 63
     uint8_t  **bap;                             ///< bit allocation pointers (bap)
64 64
     CoefType **mdct_coef;                       ///< MDCT coefficients
65
+    int32_t  **fixed_coef;                      ///< fixed-point MDCT coefficients
65 66
     uint8_t  **exp;                             ///< original exponents
66 67
     uint8_t  **grouped_exp;                     ///< grouped exponents
67 68
     int16_t  **psd;                             ///< psd per frequency bin
... ...
@@ -128,6 +129,7 @@ typedef struct AC3EncodeContext {
128 128
     uint8_t *bap_buffer;
129 129
     uint8_t *bap1_buffer;
130 130
     CoefType *mdct_coef_buffer;
131
+    int32_t *fixed_coef_buffer;
131 132
     uint8_t *exp_buffer;
132 133
     uint8_t *grouped_exp_buffer;
133 134
     int16_t *psd_buffer;
... ...
@@ -153,6 +155,8 @@ static void apply_window(SampleType *output, const SampleType *input,
153 153
 
154 154
 static int normalize_samples(AC3EncodeContext *s);
155 155
 
156
+static void scale_coefficients(AC3EncodeContext *s);
157
+
156 158
 
157 159
 /**
158 160
  * LUT for number of exponent groups.
... ...
@@ -286,11 +290,11 @@ static void extract_exponents(AC3EncodeContext *s)
286 286
         for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
287 287
             AC3Block *block = &s->blocks[blk];
288 288
             uint8_t *exp   = block->exp[ch];
289
-            CoefType *coef = block->mdct_coef[ch];
289
+            int32_t *coef = block->fixed_coef[ch];
290 290
             int exp_shift  = block->exp_shift[ch];
291 291
             for (i = 0; i < AC3_MAX_COEFS; i++) {
292 292
                 int e;
293
-                int v = abs(SCALE_COEF(coef[i]));
293
+                int v = abs(coef[i]);
294 294
                 if (v == 0)
295 295
                     e = 24;
296 296
                 else {
... ...
@@ -1017,7 +1021,7 @@ static inline int asym_quant(int c, int e, int qbits)
1017 1017
 /**
1018 1018
  * Quantize a set of mantissas for a single channel in a single block.
1019 1019
  */
1020
-static void quantize_mantissas_blk_ch(AC3EncodeContext *s, CoefType *mdct_coef,
1020
+static void quantize_mantissas_blk_ch(AC3EncodeContext *s, int32_t *fixed_coef,
1021 1021
                                       int8_t exp_shift, uint8_t *exp,
1022 1022
                                       uint8_t *bap, uint16_t *qmant, int n)
1023 1023
 {
... ...
@@ -1025,7 +1029,7 @@ static void quantize_mantissas_blk_ch(AC3EncodeContext *s, CoefType *mdct_coef,
1025 1025
 
1026 1026
     for (i = 0; i < n; i++) {
1027 1027
         int v;
1028
-        int c = SCALE_COEF(mdct_coef[i]);
1028
+        int c = fixed_coef[i];
1029 1029
         int e = exp[i] - exp_shift;
1030 1030
         int b = bap[i];
1031 1031
         switch (b) {
... ...
@@ -1122,7 +1126,7 @@ static void quantize_mantissas(AC3EncodeContext *s)
1122 1122
         s->qmant1_ptr = s->qmant2_ptr = s->qmant4_ptr = NULL;
1123 1123
 
1124 1124
         for (ch = 0; ch < s->channels; ch++) {
1125
-            quantize_mantissas_blk_ch(s, block->mdct_coef[ch], block->exp_shift[ch],
1125
+            quantize_mantissas_blk_ch(s, block->fixed_coef[ch], block->exp_shift[ch],
1126 1126
                                       block->exp[ch], block->bap[ch],
1127 1127
                                       block->qmant[ch], s->nb_coefs[ch]);
1128 1128
         }
... ...
@@ -1390,6 +1394,8 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
1390 1390
 
1391 1391
     apply_mdct(s);
1392 1392
 
1393
+    scale_coefficients(s);
1394
+
1393 1395
     process_exponents(s);
1394 1396
 
1395 1397
     ret = compute_bit_allocation(s);
... ...
@@ -1420,6 +1426,7 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
1420 1420
     av_freep(&s->bap_buffer);
1421 1421
     av_freep(&s->bap1_buffer);
1422 1422
     av_freep(&s->mdct_coef_buffer);
1423
+    av_freep(&s->fixed_coef_buffer);
1423 1424
     av_freep(&s->exp_buffer);
1424 1425
     av_freep(&s->grouped_exp_buffer);
1425 1426
     av_freep(&s->psd_buffer);
... ...
@@ -1430,6 +1437,7 @@ static av_cold int ac3_encode_close(AVCodecContext *avctx)
1430 1430
         AC3Block *block = &s->blocks[blk];
1431 1431
         av_freep(&block->bap);
1432 1432
         av_freep(&block->mdct_coef);
1433
+        av_freep(&block->fixed_coef);
1433 1434
         av_freep(&block->exp);
1434 1435
         av_freep(&block->grouped_exp);
1435 1436
         av_freep(&block->psd);
... ...
@@ -1639,6 +1647,26 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
1639 1639
         }
1640 1640
     }
1641 1641
 
1642
+    if (CONFIG_AC3ENC_FLOAT) {
1643
+        FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * s->channels *
1644
+                         AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
1645
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
1646
+            AC3Block *block = &s->blocks[blk];
1647
+            FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels *
1648
+                              sizeof(*block->fixed_coef), alloc_fail);
1649
+            for (ch = 0; ch < s->channels; ch++)
1650
+                block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
1651
+        }
1652
+    } else {
1653
+        for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
1654
+            AC3Block *block = &s->blocks[blk];
1655
+            FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels *
1656
+                              sizeof(*block->fixed_coef), alloc_fail);
1657
+            for (ch = 0; ch < s->channels; ch++)
1658
+                block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch];
1659
+        }
1660
+    }
1661
+
1642 1662
     return 0;
1643 1663
 alloc_fail:
1644 1664
     return AVERROR(ENOMEM);
... ...
@@ -319,6 +319,17 @@ static int normalize_samples(AC3EncodeContext *s)
319 319
 }
320 320
 
321 321
 
322
+/**
323
+ * Scale MDCT coefficients from float to fixed-point.
324
+ */
325
+static void scale_coefficients(AC3EncodeContext *s)
326
+{
327
+    /* scaling/conversion is obviously not needed for the fixed-point encoder
328
+       since the coefficients are already fixed-point. */
329
+    return;
330
+}
331
+
332
+
322 333
 #ifdef TEST
323 334
 /*************************************************************************/
324 335
 /* TEST */
... ...
@@ -35,8 +35,6 @@
35 35
 typedef int16_t SampleType;
36 36
 typedef int32_t CoefType;
37 37
 
38
-#define SCALE_COEF(a) (a)
39
-
40 38
 
41 39
 /**
42 40
  * Compex number.
... ...
@@ -102,6 +102,17 @@ static int normalize_samples(AC3EncodeContext *s)
102 102
 }
103 103
 
104 104
 
105
+/**
106
+ * Scale MDCT coefficients from float to 24-bit fixed-point.
107
+ */
108
+static void scale_coefficients(AC3EncodeContext *s)
109
+{
110
+    int i;
111
+    for (i = 0; i < AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels; i++)
112
+        s->fixed_coef_buffer[i] = SCALE_FLOAT(s->mdct_coef_buffer[i], 24);
113
+}
114
+
115
+
105 116
 AVCodec ac3_encoder = {
106 117
     "ac3",
107 118
     AVMEDIA_TYPE_AUDIO,
... ...
@@ -35,8 +35,6 @@
35 35
 typedef float SampleType;
36 36
 typedef float CoefType;
37 37
 
38
-#define SCALE_COEF(a) SCALE_FLOAT((a), 24)
39
-
40 38
 
41 39
 typedef struct AC3MDCTContext {
42 40
     const float *window;    ///< MDCT window function