Browse code

aacenc_tns: rework coefficient quantization and filter application

This commit reworks the TNS implementation to a hybrid between what
the specifications say, what the decoder does and what's the best
thing to do.

The filter application function was copied from the decoder and
modified such that it applies the inverse AR filter to the
coefficients. The LPC coefficients themselves are fed into the
same quantization expression that the specifications say should
be used however further processing is not done, instead they're
converted to the form that the decoder expects them to be in
and are sent off to the compute_lpc_coeffs function exactly the
way the decoder does. This function does all conversions and will
return the exact coefficients that the decoder will generate, which
are then applied to the coefficients.
Having the exact same coefficients on both the encoder and decoder
is a must since otherwise the entire sfb's over which the filter
is applied will be attenuated.

Despite this major rework, TNS might not work fine on some audio
types at very low bitrates (e.g. sub 90kbps) as it can attenuate
some coefficients too much. Users are advised to experiment with
TNS at higher bitrates if they wish to use this tool or simply
wait for the implementation to be improved.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>

Rostislav Pehlivanov authored on 2015/09/01 14:44:07
Showing 4 changed files
... ...
@@ -611,7 +611,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
611 611
                 if (s->options.tns && s->coder->search_for_tns)
612 612
                     s->coder->search_for_tns(s, sce);
613 613
                 if (s->options.tns && s->coder->apply_tns_filt)
614
-                    s->coder->apply_tns_filt(sce);
614
+                    s->coder->apply_tns_filt(s, sce);
615 615
                 if (sce->tns.present)
616 616
                     tns_mode = 1;
617 617
             }
... ...
@@ -63,7 +63,7 @@ typedef struct AACCoefficientsEncoder {
63 63
     void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
64 64
     void (*adjust_common_prediction)(struct AACEncContext *s, ChannelElement *cpe);
65 65
     void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
66
-    void (*apply_tns_filt)(SingleChannelElement *sce);
66
+    void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
67 67
     void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
68 68
     void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
69 69
     void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
... ...
@@ -31,112 +31,80 @@
31 31
 #include "aacenc_utils.h"
32 32
 #include "aacenc_quantization.h"
33 33
 
34
-static inline int compress_coef(int *coefs, int num)
35
-{
36
-    int i, c = 0;
37
-    for (i = 0; i < num; i++)
38
-        c += coefs[i] < 4 || coefs[i] > 11;
39
-    return c == num;
40
-}
41
-
42 34
 /**
43 35
  * Encode TNS data.
44 36
  * Coefficient compression saves a single bit per coefficient.
45 37
  */
46 38
 void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
47 39
 {
48
-    int i, w, filt, coef_len, coef_compress;
40
+    uint8_t u_coef;
41
+    const uint8_t coef_res = TNS_Q_BITS == 4;
42
+    int i, w, filt, coef_len, coef_compress = 0;
49 43
     const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
44
+    TemporalNoiseShaping *tns = &sce->tns;
50 45
 
51 46
     if (!sce->tns.present)
52 47
         return;
53 48
 
54 49
     for (i = 0; i < sce->ics.num_windows; i++) {
55 50
         put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]);
56
-        if (sce->tns.n_filt[i]) {
57
-            put_bits(&s->pb, 1, 1);
58
-            for (filt = 0; filt < sce->tns.n_filt[i]; filt++) {
59
-                put_bits(&s->pb, 6 - 2 * is8, sce->tns.length[i][filt]);
60
-                put_bits(&s->pb, 5 - 2 * is8, sce->tns.order[i][filt]);
61
-                if (sce->tns.order[i][filt]) {
62
-                    coef_compress = compress_coef(sce->tns.coef_idx[i][filt],
63
-                                                  sce->tns.order[i][filt]);
64
-                    put_bits(&s->pb, 1, !!sce->tns.direction[i][filt]);
51
+        if (tns->n_filt[i]) {
52
+            put_bits(&s->pb, 1, coef_res);
53
+            for (filt = 0; filt < tns->n_filt[i]; filt++) {
54
+                put_bits(&s->pb, 6 - 2 * is8, tns->length[i][filt]);
55
+                put_bits(&s->pb, 5 - 2 * is8, tns->order[i][filt]);
56
+                if (tns->order[i][filt]) {
57
+                    put_bits(&s->pb, 1, !!tns->direction[i][filt]);
65 58
                     put_bits(&s->pb, 1, !!coef_compress);
66
-                    coef_len = 4 - coef_compress;
67
-                    for (w = 0; w < sce->tns.order[i][filt]; w++)
68
-                        put_bits(&s->pb, coef_len, sce->tns.coef_idx[i][filt][w]);
59
+                    coef_len = coef_res + 3 - coef_compress;
60
+                    for (w = 0; w < tns->order[i][filt]; w++) {
61
+                        u_coef = (tns->coef_idx[i][filt][w])&(~(~0<<coef_len));
62
+                        put_bits(&s->pb, coef_len, u_coef);
63
+                    }
69 64
                 }
70 65
             }
71 66
         }
72 67
     }
73 68
 }
74 69
 
75
-static void process_tns_coeffs(TemporalNoiseShaping *tns, double *coef_raw,
76
-                               int *order_p, int w, int filt)
70
+static int quantize_coefs(double *coef, int *idx, float *lpc, int order)
77 71
 {
78
-    int i, j, order = *order_p;
79
-    int *idx = tns->coef_idx[w][filt];
80
-    float *lpc = tns->coef[w][filt];
81
-    float temp[TNS_MAX_ORDER] = {0.0f}, out[TNS_MAX_ORDER] = {0.0f};
82
-
83
-    if (!order)
84
-        return;
85
-
86
-    /* Not what the specs say, but it's better */
72
+    int i;
73
+    uint8_t u_coef;
74
+    const float *quant_arr = tns_tmp2_map[TNS_Q_BITS == 4];
75
+    const double iqfac_p = ((1 << (TNS_Q_BITS-1)) - 0.5)/(M_PI/2.0);
76
+    const double iqfac_m = ((1 << (TNS_Q_BITS-1)) + 0.5)/(M_PI/2.0);
87 77
     for (i = 0; i < order; i++) {
88
-        idx[i] = quant_array_idx(coef_raw[i], tns_tmp2_map_0_4, 16);
89
-        lpc[i] = tns_tmp2_map_0_4[idx[i]];
90
-    }
91
-
92
-    /* Trim any coeff less than 0.1f from the end */
93
-    for (i = order-1; i > -1; i--) {
94
-        lpc[i] = (fabs(lpc[i]) > 0.1f) ? lpc[i] : 0.0f;
95
-        if (lpc[i] != 0.0 ) {
96
-            order = i;
97
-            break;
98
-        }
99
-    }
100
-    order = av_clip(order, 0, TNS_MAX_ORDER - 1);
101
-    *order_p = order;
102
-    if (!order)
103
-        return;
104
-
105
-    /* Step up procedure, convert to LPC coeffs */
106
-    out[0] = 1.0f;
107
-    for (i = 1; i <= order; i++) {
108
-        for (j = 1; j < i; j++) {
109
-            temp[j] = out[j] + lpc[i]*out[i-j];
110
-        }
111
-        for (j = 1; j <= i; j++) {
112
-            out[j] = temp[j];
113
-        }
114
-        out[i] = lpc[i-1];
78
+        idx[i] = ceilf(asin(coef[i])*((coef[i] >= 0) ? iqfac_p : iqfac_m));
79
+        u_coef = (idx[i])&(~(~0<<TNS_Q_BITS));
80
+        lpc[i] = quant_arr[u_coef];
115 81
     }
116
-    memcpy(lpc, out, TNS_MAX_ORDER*sizeof(float));
82
+    return order;
117 83
 }
118 84
 
119 85
 /* Apply TNS filter */
120
-void ff_aac_apply_tns(SingleChannelElement *sce)
86
+void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce)
121 87
 {
122
-    float *coef = sce->pcoeffs;
123 88
     TemporalNoiseShaping *tns = &sce->tns;
124
-    int w, filt, m, i;
125
-    int bottom, top, order, start, end, size, inc;
126
-    float *lpc, tmp[TNS_MAX_ORDER+1];
89
+    IndividualChannelStream *ics = &sce->ics;
90
+    int w, filt, m, i, top, order, bottom, start, end, size, inc;
91
+    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
92
+    float lpc[TNS_MAX_ORDER];
127 93
 
128
-    for (w = 0; w < sce->ics.num_windows; w++) {
129
-        bottom = sce->ics.num_swb;
94
+    for (w = 0; w < ics->num_windows; w++) {
95
+        bottom = ics->num_swb;
130 96
         for (filt = 0; filt < tns->n_filt[w]; filt++) {
131 97
             top    = bottom;
132 98
             bottom = FFMAX(0, top - tns->length[w][filt]);
133 99
             order  = tns->order[w][filt];
134
-            lpc    = tns->coef[w][filt];
135
-            if (!order)
100
+            if (order == 0)
136 101
                 continue;
137 102
 
138
-            start = sce->ics.swb_offset[bottom];
139
-            end   = sce->ics.swb_offset[top];
103
+            // tns_decode_coef
104
+            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
105
+
106
+            start = ics->swb_offset[FFMIN(bottom, mmm)];
107
+            end   = ics->swb_offset[FFMIN(   top, mmm)];
140 108
             if ((size = end - start) <= 0)
141 109
                 continue;
142 110
             if (tns->direction[w][filt]) {
... ...
@@ -147,21 +115,10 @@ void ff_aac_apply_tns(SingleChannelElement *sce)
147 147
             }
148 148
             start += w * 128;
149 149
 
150
-            if (!sce->ics.ltp.present) {
151
-                // ar filter
152
-                for (m = 0; m < size; m++, start += inc)
153
-                    for (i = 1; i <= FFMIN(m, order); i++)
154
-                        coef[start] += coef[start - i * inc]*lpc[i - 1];
155
-            } else {
156
-                // ma filter
157
-                for (m = 0; m < size; m++, start += inc) {
158
-                    tmp[0] = coef[start];
159
-                    for (i = 1; i <= FFMIN(m, order); i++)
160
-                        coef[start] += tmp[i]*lpc[i - 1];
161
-                    for (i = order; i > 0; i--)
162
-                        tmp[i] = tmp[i - 1];
163
-                }
164
-            }
150
+            // ar filter
151
+            for (m = 0; m < size; m++, start += inc)
152
+                for (i = 1; i <= FFMIN(m, order); i++)
153
+                    sce->coeffs[start] += lpc[i-1]*sce->pcoeffs[start - i*inc];
165 154
         }
166 155
     }
167 156
 }
... ...
@@ -169,57 +126,53 @@ void ff_aac_apply_tns(SingleChannelElement *sce)
169 169
 void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
170 170
 {
171 171
     TemporalNoiseShaping *tns = &sce->tns;
172
-    int w, g, w2, prev_end_sfb = 0, count = 0;
172
+    int w, w2, g, count = 0;
173
+    const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
173 174
     const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
174
-    const int tns_max_order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
175
+    int order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
176
+
177
+    int sfb_start = av_clip(tns_min_sfb[is8][s->samplerate_index], 0, mmm);
178
+    int sfb_end   = av_clip(sce->ics.num_swb, 0, mmm);
175 179
 
176 180
     for (w = 0; w < sce->ics.num_windows; w++) {
177
-        int order = 0, filters = 1;
178
-        int sfb_start = 0, sfb_len = 0;
179
-        int coef_start = 0, coef_len = 0;
180
-        float energy = 0.0f, threshold = 0.0f;
181
-        double coefs[MAX_LPC_ORDER][MAX_LPC_ORDER] = {{0}};
181
+        float en_low = 0.0f, en_high = 0.0f, threshold = 0.0f, spread = 0.0f;
182
+        double gain = 0.0f, coefs[MAX_LPC_ORDER] = {0};
183
+
184
+        int coef_start = w*sce->ics.num_swb + sce->ics.swb_offset[sfb_start];
185
+        int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
186
+
182 187
         for (g = 0;  g < sce->ics.num_swb; g++) {
183
-            if (!sfb_start && w*16+g > TNS_LOW_LIMIT && w*16+g > prev_end_sfb) {
184
-                sfb_start = w*16+g;
185
-                coef_start =  sce->ics.swb_offset[sfb_start];
186
-            }
187
-            if (sfb_start) {
188
-                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
189
-                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
190
-                    if (!sfb_len && band->energy < band->threshold*1.3f) {
191
-                        sfb_len = (w+w2)*16+g - sfb_start;
192
-                        prev_end_sfb = sfb_start + sfb_len;
193
-                        coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
194
-                        break;
195
-                    }
196
-                    energy += band->energy;
197
-                    threshold += band->threshold;
198
-                }
199
-                if (!sfb_len) {
200
-                    sfb_len = (w+1)*16+g - sfb_start - 1;
201
-                    coef_len = sce->ics.swb_offset[sfb_start + sfb_len] - coef_start;
202
-                }
188
+            if (w*16+g < sfb_start || w*16+g > sfb_end)
189
+                continue;
190
+            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
191
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
192
+                if ((w+w2)*16+g > sfb_start + ((sfb_end - sfb_start)/2))
193
+                    en_high += band->energy;
194
+                else
195
+                    en_low  += band->energy;
196
+                threshold += band->threshold;
197
+                spread += band->spread;
203 198
             }
204 199
         }
205 200
 
206
-        if (sfb_len <= 0 || coef_len <= 0)
201
+        if (coef_len <= 0 || (sfb_end - sfb_start) <= 0)
207 202
             continue;
208
-        if (coef_start + coef_len >= 1024)
209
-            coef_len = 1024 - coef_start;
210 203
 
211 204
         /* LPC */
212
-        order = ff_lpc_calc_levinson(&s->lpc, &sce->coeffs[coef_start], coef_len,
213
-                                     coefs, 0, tns_max_order, ORDER_METHOD_LOG);
205
+        gain = ff_lpc_calc_ref_coefs_f(&s->lpc, &sce->coeffs[coef_start],
206
+                                       coef_len, order, coefs);
207
+
208
+        gain *= s->lambda/110.0f;
214 209
 
215
-        if (energy > threshold) {
216
-            int direction = 0;
217
-            tns->n_filt[w] = filters++;
210
+        if (gain > TNS_GAIN_THRESHOLD_LOW && gain*0 < TNS_GAIN_THRESHOLD_HIGH &&
211
+            (en_low+en_high) > TNS_GAIN_THRESHOLD_LOW*threshold &&
212
+            spread > TNS_SPREAD_THRESHOLD) {
213
+            tns->n_filt[w] = 1;
218 214
             for (g = 0; g < tns->n_filt[w]; g++) {
219
-                process_tns_coeffs(tns, coefs[order], &order, w, g);
220
-                tns->order[w][g]     = order;
221
-                tns->length[w][g]    = sfb_len;
222
-                tns->direction[w][g] = direction;
215
+                tns->length[w][g] = sfb_end - sfb_start;
216
+                tns->direction[w][g] = en_low < en_high && TNS_DIRECTION_VARY;
217
+                tns->order[w][g] = quantize_coefs(coefs, tns->coef_idx[w][g],
218
+                                                  tns->coef[w][g], order);
223 219
             }
224 220
             count++;
225 221
         }
... ...
@@ -30,11 +30,21 @@
30 30
 
31 31
 #include "aacenc.h"
32 32
 
33
-/** Lower limit of TNS in SFBs **/
34
-#define TNS_LOW_LIMIT 24
33
+/* Could be set to 3 to save an additional bit at the cost of little quality */
34
+#define TNS_Q_BITS 4
35
+
36
+/* TNS will only be used if the LPC gain is within these margins */
37
+#define TNS_GAIN_THRESHOLD_LOW  1.395f
38
+#define TNS_GAIN_THRESHOLD_HIGH 11.19f
39
+
40
+/* Do not use TNS if the psy band spread is below this value */
41
+#define TNS_SPREAD_THRESHOLD 20.081512f
42
+
43
+/* Allows to reverse the filter direction if the band energy is uneven */
44
+#define TNS_DIRECTION_VARY 1
35 45
 
36 46
 void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
37
-void ff_aac_apply_tns(SingleChannelElement *sce);
47
+void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce);
38 48
 void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
39 49
 
40 50
 #endif /* AVCODEC_AACENC_TNS_H */