Browse code

More OKed AAC decoder hunks

Originally committed as revision 14774 to svn://svn.ffmpeg.org/ffmpeg/trunk

Robert Swain authored on 2008/08/15 17:01:31
Showing 4 changed files
... ...
@@ -90,10 +90,6 @@
90 90
 #include <math.h>
91 91
 #include <string.h>
92 92
 
93
-#ifndef CONFIG_HARDCODED_TABLES
94
-    static float ff_aac_pow2sf_tab[316];
95
-#endif /* CONFIG_HARDCODED_TABLES */
96
-
97 93
 static VLC vlc_scalefactors;
98 94
 static VLC vlc_spectral[11];
99 95
 
... ...
@@ -413,6 +409,12 @@ static av_cold int aac_decode_init(AVCodecContext * avccontext) {
413 413
 
414 414
     ff_mdct_init(&ac->mdct, 11, 1);
415 415
     ff_mdct_init(&ac->mdct_small, 8, 1);
416
+    // window initialization
417
+    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
418
+    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
419
+    ff_sine_window_init(ff_sine_1024, 1024);
420
+    ff_sine_window_init(ff_sine_128, 128);
421
+
416 422
     return 0;
417 423
 }
418 424
 
... ...
@@ -446,7 +448,27 @@ static int decode_ics_info(AACContext * ac, IndividualChannelStream * ics, GetBi
446 446
     ics->use_kb_window[0] = get_bits1(gb);
447 447
     ics->num_window_groups = 1;
448 448
     ics->group_len[0] = 1;
449
-
449
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
450
+        int i;
451
+        ics->max_sfb = get_bits(gb, 4);
452
+        for (i = 0; i < 7; i++) {
453
+            if (get_bits1(gb)) {
454
+                ics->group_len[ics->num_window_groups-1]++;
455
+            } else {
456
+                ics->num_window_groups++;
457
+                ics->group_len[ics->num_window_groups-1] = 1;
458
+            }
459
+        }
460
+        ics->num_windows   = 8;
461
+        ics->swb_offset    =      swb_offset_128[ac->m4ac.sampling_index];
462
+        ics->num_swb       =  ff_aac_num_swb_128[ac->m4ac.sampling_index];
463
+        ics->tns_max_bands =   tns_max_bands_128[ac->m4ac.sampling_index];
464
+    } else {
465
+        ics->max_sfb       = get_bits(gb, 6);
466
+        ics->num_windows   = 1;
467
+        ics->swb_offset    =     swb_offset_1024[ac->m4ac.sampling_index];
468
+        ics->num_swb       = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
469
+        ics->tns_max_bands =  tns_max_bands_1024[ac->m4ac.sampling_index];
450 470
         if (get_bits1(gb)) {
451 471
             av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1);
452 472
             memset(ics, 0, sizeof(IndividualChannelStream));
... ...
@@ -496,6 +518,10 @@ static int decode_band_types(AACContext * ac, enum BandType band_type[120],
496 496
                     sect_len, ics->max_sfb);
497 497
                 return -1;
498 498
             }
499
+            for (; k < sect_len; k++) {
500
+                band_type        [idx]   = sect_band_type;
501
+                band_type_run_end[idx++] = sect_len;
502
+            }
499 503
         }
500 504
     }
501 505
     return 0;
... ...
@@ -597,6 +623,106 @@ static void decode_mid_side_stereo(ChannelElement * cpe, GetBitContext * gb,
597 597
 }
598 598
 
599 599
 /**
600
+ * Decode spectral data; reference: table 4.50.
601
+ * Dequantize and scale spectral data; reference: 4.6.3.3.
602
+ *
603
+ * @param   coef            array of dequantized, scaled spectral data
604
+ * @param   sf              array of scalefactors or intensity stereo positions
605
+ * @param   pulse_present   set if pulses are present
606
+ * @param   pulse           pointer to pulse data struct
607
+ * @param   band_type       array of the used band type
608
+ *
609
+ * @return  Returns error status. 0 - OK, !0 - error
610
+ */
611
+static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBitContext * gb, float sf[120],
612
+        int pulse_present, const Pulse * pulse, const IndividualChannelStream * ics, enum BandType band_type[120]) {
613
+    int i, k, g, idx = 0;
614
+    const int c = 1024/ics->num_windows;
615
+    const uint16_t * offsets = ics->swb_offset;
616
+    float *coef_base = coef;
617
+
618
+    for (g = 0; g < ics->num_windows; g++)
619
+        memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
620
+
621
+    for (g = 0; g < ics->num_window_groups; g++) {
622
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
623
+            const int cur_band_type = band_type[idx];
624
+            const int dim = cur_band_type >= FIRST_PAIR_BT ? 2 : 4;
625
+            const int is_cb_unsigned = IS_CODEBOOK_UNSIGNED(cur_band_type);
626
+            int group;
627
+            if (cur_band_type == ZERO_BT) {
628
+                for (group = 0; group < ics->group_len[g]; group++) {
629
+                    memset(coef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(float));
630
+                }
631
+            }else if (cur_band_type == NOISE_BT) {
632
+                const float scale = sf[idx] / ((offsets[i+1] - offsets[i]) * PNS_MEAN_ENERGY);
633
+                for (group = 0; group < ics->group_len[g]; group++) {
634
+                    for (k = offsets[i]; k < offsets[i+1]; k++) {
635
+                        ac->random_state  = lcg_random(ac->random_state);
636
+                        coef[group*128+k] = ac->random_state * scale;
637
+                    }
638
+                }
639
+            }else if (cur_band_type != INTENSITY_BT2 && cur_band_type != INTENSITY_BT) {
640
+                for (group = 0; group < ics->group_len[g]; group++) {
641
+                    for (k = offsets[i]; k < offsets[i+1]; k += dim) {
642
+                        const int index = get_vlc2(gb, vlc_spectral[cur_band_type - 1].table, 6, 3);
643
+                        const int coef_tmp_idx = (group << 7) + k;
644
+                        const float *vq_ptr;
645
+                        int j;
646
+                        if(index >= ff_aac_spectral_sizes[cur_band_type - 1]) {
647
+                            av_log(ac->avccontext, AV_LOG_ERROR,
648
+                                "Read beyond end of ff_aac_codebook_vectors[%d][]. index %d >= %d\n",
649
+                                cur_band_type - 1, index, ff_aac_spectral_sizes[cur_band_type - 1]);
650
+                            return -1;
651
+                        }
652
+                        vq_ptr = &ff_aac_codebook_vectors[cur_band_type - 1][index * dim];
653
+                        if (is_cb_unsigned) {
654
+                            for (j = 0; j < dim; j++)
655
+                                if (vq_ptr[j])
656
+                                    coef[coef_tmp_idx + j] = 1 - 2*(int)get_bits1(gb);
657
+                        }else {
658
+                            for (j = 0; j < dim; j++)
659
+                                coef[coef_tmp_idx + j] = 1.0f;
660
+                        }
661
+                        if (cur_band_type == ESC_BT) {
662
+                            for (j = 0; j < 2; j++) {
663
+                                if (vq_ptr[j] == 64.0f) {
664
+                                    int n = 4;
665
+                                    /* The total length of escape_sequence must be < 22 bits according
666
+                                       to the specification (i.e. max is 11111111110xxxxxxxxxx). */
667
+                                    while (get_bits1(gb) && n < 15) n++;
668
+                                    if(n == 15) {
669
+                                        av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
670
+                                        return -1;
671
+                                    }
672
+                                    n = (1<<n) + get_bits(gb, n);
673
+                                    coef[coef_tmp_idx + j] *= cbrtf(fabsf(n)) * n;
674
+                                }else
675
+                                    coef[coef_tmp_idx + j] *= vq_ptr[j];
676
+                            }
677
+                        }else
678
+                            for (j = 0; j < dim; j++)
679
+                                coef[coef_tmp_idx + j] *= vq_ptr[j];
680
+                        for (j = 0; j < dim; j++)
681
+                            coef[coef_tmp_idx + j] *= sf[idx];
682
+                    }
683
+                }
684
+            }
685
+        }
686
+        coef += ics->group_len[g]<<7;
687
+    }
688
+
689
+    if (pulse_present) {
690
+        for(i = 0; i < pulse->num_pulse; i++){
691
+            float co  = coef_base[ pulse->pos[i] ];
692
+            float ico = co / sqrtf(sqrtf(fabsf(co))) + pulse->amp[i];
693
+            coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico;
694
+        }
695
+    }
696
+    return 0;
697
+}
698
+
699
+/**
600 700
  * Decode an individual_channel_stream payload; reference: table 4.44.
601 701
  *
602 702
  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
... ...
@@ -651,6 +777,72 @@ static int decode_ics(AACContext * ac, SingleChannelElement * sce, GetBitContext
651 651
 }
652 652
 
653 653
 /**
654
+ * Mid/Side stereo decoding; reference: 4.6.8.1.3.
655
+ */
656
+static void apply_mid_side_stereo(ChannelElement * cpe) {
657
+    const IndividualChannelStream * ics = &cpe->ch[0].ics;
658
+    float *ch0 = cpe->ch[0].coeffs;
659
+    float *ch1 = cpe->ch[1].coeffs;
660
+    int g, i, k, group, idx = 0;
661
+    const uint16_t * offsets = ics->swb_offset;
662
+    for (g = 0; g < ics->num_window_groups; g++) {
663
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
664
+            if (cpe->ms_mask[idx] &&
665
+                cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) {
666
+                for (group = 0; group < ics->group_len[g]; group++) {
667
+                    for (k = offsets[i]; k < offsets[i+1]; k++) {
668
+                        float tmp = ch0[group*128 + k] - ch1[group*128 + k];
669
+                        ch0[group*128 + k] += ch1[group*128 + k];
670
+                        ch1[group*128 + k] = tmp;
671
+                    }
672
+                }
673
+            }
674
+        }
675
+        ch0 += ics->group_len[g]*128;
676
+        ch1 += ics->group_len[g]*128;
677
+    }
678
+}
679
+
680
+/**
681
+ * intensity stereo decoding; reference: 4.6.8.2.3
682
+ *
683
+ * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
684
+ *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
685
+ *                      [3] reserved for scalable AAC
686
+ */
687
+static void apply_intensity_stereo(ChannelElement * cpe, int ms_present) {
688
+    const IndividualChannelStream * ics = &cpe->ch[1].ics;
689
+    SingleChannelElement * sce1 = &cpe->ch[1];
690
+    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
691
+    const uint16_t * offsets = ics->swb_offset;
692
+    int g, group, i, k, idx = 0;
693
+    int c;
694
+    float scale;
695
+    for (g = 0; g < ics->num_window_groups; g++) {
696
+        for (i = 0; i < ics->max_sfb;) {
697
+            if (sce1->band_type[idx] == INTENSITY_BT || sce1->band_type[idx] == INTENSITY_BT2) {
698
+                const int bt_run_end = sce1->band_type_run_end[idx];
699
+                for (; i < bt_run_end; i++, idx++) {
700
+                    c = -1 + 2 * (sce1->band_type[idx] - 14);
701
+                    if (ms_present)
702
+                        c *= 1 - 2 * cpe->ms_mask[idx];
703
+                    scale = c * sce1->sf[idx];
704
+                    for (group = 0; group < ics->group_len[g]; group++)
705
+                        for (k = offsets[i]; k < offsets[i+1]; k++)
706
+                            coef1[group*128 + k] = scale * coef0[group*128 + k];
707
+                }
708
+            } else {
709
+                int bt_run_end = sce1->band_type_run_end[idx];
710
+                idx += bt_run_end - i;
711
+                i    = bt_run_end;
712
+            }
713
+        }
714
+        coef0 += ics->group_len[g]*128;
715
+        coef1 += ics->group_len[g]*128;
716
+    }
717
+}
718
+
719
+/**
654 720
  * Decode a channel_pair_element; reference: table 4.4.
655 721
  *
656 722
  * @param   elem_id Identifies the instance of a syntax element.
... ...
@@ -688,6 +880,21 @@ static int decode_cpe(AACContext * ac, GetBitContext * gb, int elem_id) {
688 688
     return 0;
689 689
 }
690 690
 
691
+/**
692
+ * Decode coupling_channel_element; reference: table 4.8.
693
+ *
694
+ * @param   elem_id Identifies the instance of a syntax element.
695
+ *
696
+ * @return  Returns error status. 0 - OK, !0 - error
697
+ */
698
+static int decode_cce(AACContext * ac, GetBitContext * gb, ChannelElement * che) {
699
+    int num_gain = 0;
700
+    int c, g, sfb, ret, idx = 0;
701
+    int sign;
702
+    float scale;
703
+    SingleChannelElement * sce = &che->ch[0];
704
+    ChannelCoupling * coup     = &che->coup;
705
+
691 706
     coup->coupling_point = 2*get_bits1(gb);
692 707
     coup->num_coupled = get_bits(gb, 3);
693 708
     for (c = 0; c <= coup->num_coupled; c++) {
... ...
@@ -966,6 +1173,58 @@ static void apply_independent_coupling(AACContext * ac, SingleChannelElement * s
966 966
         sce->ret[i] += cc->coup.gain[index][0] * (cc->ch[0].ret[i] - ac->add_bias);
967 967
 }
968 968
 
969
+/**
970
+ * channel coupling transformation interface
971
+ *
972
+ * @param   index   index into coupling gain array
973
+ * @param   apply_coupling_method   pointer to (in)dependent coupling function
974
+ */
975
+static void apply_channel_coupling(AACContext * ac, ChannelElement * cc,
976
+        void (*apply_coupling_method)(AACContext * ac, SingleChannelElement * sce, ChannelElement * cc, int index))
977
+{
978
+    int c;
979
+    int index = 0;
980
+    ChannelCoupling * coup = &cc->coup;
981
+    for (c = 0; c <= coup->num_coupled; c++) {
982
+        if (ac->che[coup->type[c]][coup->id_select[c]]) {
983
+            if (coup->ch_select[c] != 2) {
984
+                apply_coupling_method(ac, &ac->che[coup->type[c]][coup->id_select[c]]->ch[0], cc, index);
985
+                if (coup->ch_select[c] != 0)
986
+                    index++;
987
+            }
988
+            if (coup->ch_select[c] != 1)
989
+                apply_coupling_method(ac, &ac->che[coup->type[c]][coup->id_select[c]]->ch[1], cc, index++);
990
+        } else {
991
+            av_log(ac->avccontext, AV_LOG_ERROR,
992
+                   "coupling target %sE[%d] not available\n",
993
+                   coup->type[c] == TYPE_CPE ? "CP" : "SC", coup->id_select[c]);
994
+            break;
995
+        }
996
+    }
997
+}
998
+
999
+/**
1000
+ * Convert spectral data to float samples, applying all supported tools as appropriate.
1001
+ */
1002
+static void spectral_to_sample(AACContext * ac) {
1003
+    int i, type;
1004
+    for (i = 0; i < MAX_ELEM_ID; i++) {
1005
+        for(type = 0; type < 4; type++) {
1006
+            ChannelElement *che = ac->che[type][i];
1007
+            if(che) {
1008
+                if(che->coup.coupling_point == BEFORE_TNS)
1009
+                    apply_channel_coupling(ac, che, apply_dependent_coupling);
1010
+                if(che->ch[0].tns.present)
1011
+                    apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
1012
+                if(che->ch[1].tns.present)
1013
+                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
1014
+                if(che->coup.coupling_point == BETWEEN_TNS_AND_IMDCT)
1015
+                    apply_channel_coupling(ac, che, apply_dependent_coupling);
1016
+                imdct_and_windowing(ac, &che->ch[0]);
1017
+                if(type == TYPE_CPE)
1018
+                    imdct_and_windowing(ac, &che->ch[1]);
1019
+                if(che->coup.coupling_point == AFTER_IMDCT)
1020
+                    apply_channel_coupling(ac, che, apply_independent_coupling);
969 1021
             }
970 1022
         }
971 1023
     }
... ...
@@ -45,6 +45,9 @@
45 45
 #define MAX_CHANNELS 64
46 46
 #define MAX_ELEM_ID 16
47 47
 
48
+#define TNS_MAX_ORDER 20
49
+#define PNS_MEAN_ENERGY 3719550720.0f // sqrt(3.0) * 1<<31
50
+
48 51
 enum AudioObjectType {
49 52
     AOT_NULL,
50 53
                                // Support?                Name
... ...
@@ -32,6 +32,9 @@
32 32
 
33 33
 #include <stdint.h>
34 34
 
35
+DECLARE_ALIGNED(16, float,  ff_aac_kbd_long_1024[1024]);
36
+DECLARE_ALIGNED(16, float,  ff_aac_kbd_short_128[128]);
37
+
35 38
 const uint8_t ff_aac_num_swb_1024[] = {
36 39
     41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40
37 40
 };
... ...
@@ -983,4 +986,8 @@ const float ff_aac_pow2sf_tab[316] = {
983 983
     2.68435456e+08, 3.19225354e+08, 3.79625062e+08, 4.51452825e+08,
984 984
 };
985 985
 
986
+#else
987
+
988
+float ff_aac_pow2sf_tab[316];
989
+
986 990
 #endif /* CONFIG_HARDCODED_TABLES */
... ...
@@ -40,6 +40,13 @@
40 40
  * encoder.
41 41
  */
42 42
 
43
+/* @name window coefficients
44
+ * @{
45
+ */
46
+DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_long_1024[1024]);
47
+DECLARE_ALIGNED(16, extern float,  ff_aac_kbd_short_128[128]);
48
+// @}
49
+
43 50
 /* @name number of scalefactor window bands for long and short transform windows respectively
44 51
  * @{
45 52
  */
... ...
@@ -58,6 +65,8 @@ extern const float *ff_aac_codebook_vectors[];
58 58
 
59 59
 #ifdef CONFIG_HARDCODED_TABLES
60 60
 extern const float ff_aac_pow2sf_tab[316];
61
+#else
62
+extern       float ff_aac_pow2sf_tab[316];
61 63
 #endif /* CONFIG_HARDCODED_TABLES */
62 64
 
63 65
 #endif /* FFMPEG_AACTAB_H */