Browse code

AAC Encoder: clipping avoidance

Avoid clipping due to quantization noise to produce audible
artifacts, by detecting near-clipping signals and both attenuating
them a little and encoding escape-encoded bands (usually the
loudest) rounding towards zero instead of nearest, which tends to
decrease overall energy and thus clipping.

Currently fate tests measure numerical error so this change makes
tests using asynth (which are near clipping) report higher error
not less, because of window attenuation. Yet, they sound better,
not worse (albeit subtle, other samples aren't subtle at all).
Only measuring psychoacoustically weighted error would make for
a representative test, so that will be left for a future patch.

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>

Claudio Freire authored on 2015/07/21 10:53:24
Showing 7 changed files
... ...
@@ -50,6 +50,8 @@
50 50
 #define TNS_MAX_ORDER 20
51 51
 #define MAX_LTP_LONG_SFB 40
52 52
 
53
+#define CLIP_AVOIDANCE_FACTOR 0.95f
54
+
53 55
 enum RawDataBlockType {
54 56
     TYPE_SCE,
55 57
     TYPE_CPE,
... ...
@@ -180,6 +182,8 @@ typedef struct IndividualChannelStream {
180 180
     int predictor_initialized;
181 181
     int predictor_reset_group;
182 182
     uint8_t prediction_used[41];
183
+    uint8_t window_clipping[8]; ///< set if a certain window is near clipping
184
+    float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it
183 185
 } IndividualChannelStream;
184 186
 
185 187
 /**
... ...
@@ -79,6 +79,9 @@ static const uint8_t * const run_value_bits[2] = {
79 79
     run_value_bits_long, run_value_bits_short
80 80
 };
81 81
 
82
+#define ROUND_STANDARD 0.4054f
83
+#define ROUND_TO_ZERO 0.1054f
84
+
82 85
 /** Map to convert values from BandCodingPath index to a codebook index **/
83 86
 static const uint8_t aac_cb_out_map[CB_TOT_ALL]  = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
84 87
 /** Inverse map to convert from codebooks to BandCodingPath indices **/
... ...
@@ -89,20 +92,20 @@ static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,
89 89
  * @return absolute value of the quantized coefficient
90 90
  * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
91 91
  */
92
-static av_always_inline int quant(float coef, const float Q)
92
+static av_always_inline int quant(float coef, const float Q, const float rounding)
93 93
 {
94 94
     float a = coef * Q;
95
-    return sqrtf(a * sqrtf(a)) + 0.4054;
95
+    return sqrtf(a * sqrtf(a)) + rounding;
96 96
 }
97 97
 
98 98
 static void quantize_bands(int *out, const float *in, const float *scaled,
99
-                           int size, float Q34, int is_signed, int maxval)
99
+                           int size, float Q34, int is_signed, int maxval, const float rounding)
100 100
 {
101 101
     int i;
102 102
     double qc;
103 103
     for (i = 0; i < size; i++) {
104 104
         qc = scaled[i] * Q34;
105
-        out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
105
+        out[i] = (int)FFMIN(qc + rounding, (double)maxval);
106 106
         if (is_signed && in[i] < 0.0f) {
107 107
             out[i] = -out[i];
108 108
         }
... ...
@@ -134,7 +137,8 @@ static av_always_inline float quantize_and_encode_band_cost_template(
134 134
                                 const float *scaled, int size, int scale_idx,
135 135
                                 int cb, const float lambda, const float uplim,
136 136
                                 int *bits, int BT_ZERO, int BT_UNSIGNED,
137
-                                int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO)
137
+                                int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
138
+                                const float ROUNDING)
138 139
 {
139 140
     const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
140 141
     const float Q   = ff_aac_pow2sf_tab [q_idx];
... ...
@@ -158,7 +162,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
158 158
         abs_pow34_v(s->scoefs, in, size);
159 159
         scaled = s->scoefs;
160 160
     }
161
-    quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb]);
161
+    quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb], ROUNDING);
162 162
     if (BT_UNSIGNED) {
163 163
         off = 0;
164 164
     } else {
... ...
@@ -185,7 +189,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
185 185
                         di = t - CLIPPED_ESCAPE;
186 186
                         curbits += 21;
187 187
                     } else {
188
-                        int c = av_clip_uintp2(quant(t, Q), 13);
188
+                        int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
189 189
                         di = t - c*cbrtf(c)*IQ;
190 190
                         curbits += av_log2(c)*2 - 4 + 1;
191 191
                     }
... ...
@@ -215,7 +219,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
215 215
             if (BT_ESC) {
216 216
                 for (j = 0; j < 2; j++) {
217 217
                     if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
218
-                        int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q), 13);
218
+                        int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q, ROUNDING), 13);
219 219
                         int len = av_log2(coef);
220 220
 
221 221
                         put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
... ...
@@ -240,7 +244,7 @@ static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitC
240 240
     return 0.0f;
241 241
 }
242 242
 
243
-#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO) \
243
+#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
244 244
 static float quantize_and_encode_band_cost_ ## NAME(                                         \
245 245
                                 struct AACEncContext *s,                                     \
246 246
                                 PutBitContext *pb, const float *in,                          \
... ...
@@ -250,17 +254,19 @@ static float quantize_and_encode_band_cost_ ## NAME(
250 250
     return quantize_and_encode_band_cost_template(                                           \
251 251
                                 s, pb, in, scaled, size, scale_idx,                          \
252 252
                                 BT_ESC ? ESC_BT : cb, lambda, uplim, bits,                   \
253
-                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO); \
253
+                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO,  \
254
+                                ROUNDING);                                                   \
254 255
 }
255 256
 
256
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0)
257
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0)
258
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0)
259
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0)
260
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0)
261
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0)
262
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0)
263
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1)
257
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0, ROUND_STANDARD)
258
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0, ROUND_STANDARD)
259
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0, ROUND_STANDARD)
260
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0, ROUND_STANDARD)
261
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0, ROUND_STANDARD)
262
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0, ROUND_STANDARD)
263
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC_RTZ, 0, 1, 1, 1, 0, 0, ROUND_TO_ZERO)
264
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0, ROUND_STANDARD)
265
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1, ROUND_STANDARD)
264 266
 
265 267
 static float (*const quantize_and_encode_band_cost_arr[])(
266 268
                                 struct AACEncContext *s,
... ...
@@ -286,28 +292,52 @@ static float (*const quantize_and_encode_band_cost_arr[])(
286 286
     quantize_and_encode_band_cost_STEREO,
287 287
 };
288 288
 
289
+static float (*const quantize_and_encode_band_cost_rtz_arr[])(
290
+                                struct AACEncContext *s,
291
+                                PutBitContext *pb, const float *in,
292
+                                const float *scaled, int size, int scale_idx,
293
+                                int cb, const float lambda, const float uplim,
294
+                                int *bits) = {
295
+    quantize_and_encode_band_cost_ZERO,
296
+    quantize_and_encode_band_cost_SQUAD,
297
+    quantize_and_encode_band_cost_SQUAD,
298
+    quantize_and_encode_band_cost_UQUAD,
299
+    quantize_and_encode_band_cost_UQUAD,
300
+    quantize_and_encode_band_cost_SPAIR,
301
+    quantize_and_encode_band_cost_SPAIR,
302
+    quantize_and_encode_band_cost_UPAIR,
303
+    quantize_and_encode_band_cost_UPAIR,
304
+    quantize_and_encode_band_cost_UPAIR,
305
+    quantize_and_encode_band_cost_UPAIR,
306
+    quantize_and_encode_band_cost_ESC_RTZ,
307
+    quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
308
+    quantize_and_encode_band_cost_NOISE,
309
+    quantize_and_encode_band_cost_STEREO,
310
+    quantize_and_encode_band_cost_STEREO,
311
+};
312
+
289 313
 #define quantize_and_encode_band_cost(                                  \
290 314
                                 s, pb, in, scaled, size, scale_idx, cb, \
291
-                                lambda, uplim, bits)                    \
292
-    quantize_and_encode_band_cost_arr[cb](                              \
315
+                                lambda, uplim, bits, rtz)               \
316
+    ((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
293 317
                                 s, pb, in, scaled, size, scale_idx, cb, \
294 318
                                 lambda, uplim, bits)
295 319
 
296 320
 static float quantize_band_cost(struct AACEncContext *s, const float *in,
297 321
                                 const float *scaled, int size, int scale_idx,
298 322
                                 int cb, const float lambda, const float uplim,
299
-                                int *bits)
323
+                                int *bits, int rtz)
300 324
 {
301 325
     return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
302
-                                         cb, lambda, uplim, bits);
326
+                                         cb, lambda, uplim, bits, rtz);
303 327
 }
304 328
 
305 329
 static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
306 330
                                      const float *in, int size, int scale_idx,
307
-                                     int cb, const float lambda)
331
+                                     int cb, const float lambda, int rtz)
308 332
 {
309 333
     quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
310
-                                  INFINITY, NULL);
334
+                                  INFINITY, NULL, rtz);
311 335
 }
312 336
 
313 337
 static float find_max_val(int group_len, int swb_size, const float *scaled) {
... ...
@@ -397,7 +427,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
397 397
                     rd += quantize_band_cost(s, sce->coeffs + start + w*128,
398 398
                                              s->scoefs + start + w*128, size,
399 399
                                              sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
400
-                                             lambda / band->threshold, INFINITY, NULL);
400
+                                             lambda / band->threshold, INFINITY, NULL, 0);
401 401
                 }
402 402
                 cost_stay_here = path[swb][cb].cost + rd;
403 403
                 cost_get_here  = minrd              + rd + run_bits + 4;
... ...
@@ -527,9 +557,9 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
527 527
                 for (w = 0; w < group_len; w++) {
528 528
                     bits += quantize_band_cost(s, sce->coeffs + start + w*128,
529 529
                                                s->scoefs + start + w*128, size,
530
-                                               sce->sf_idx[(win+w)*16+swb],
530
+                                               sce->sf_idx[win*16+swb],
531 531
                                                aac_cb_out_map[cb],
532
-                                               0, INFINITY, NULL);
532
+                                               0, INFINITY, NULL, 0);
533 533
                 }
534 534
                 cost_stay_here = path[swb][cb].cost + bits;
535 535
                 cost_get_here  = minbits            + bits + run_bits + 4;
... ...
@@ -749,7 +779,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
749 749
                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
750 750
                         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
751 751
                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
752
-                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL);
752
+                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
753 753
                     }
754 754
                     minrd = FFMIN(minrd, dist);
755 755
 
... ...
@@ -895,7 +925,8 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
895 895
                                                    cb,
896 896
                                                    1.0f,
897 897
                                                    INFINITY,
898
-                                                   &b);
898
+                                                   &b,
899
+                                                   0);
899 900
                         bits += b;
900 901
                     }
901 902
                     dists[w*16+g] = dist - bits;
... ...
@@ -1061,11 +1092,12 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
1061 1061
                                                ESC_BT,
1062 1062
                                                lambda,
1063 1063
                                                INFINITY,
1064
-                                               &b);
1064
+                                               &b,
1065
+                                               0);
1065 1066
                     dist -= b;
1066 1067
                 }
1067 1068
                 dist *= 1.0f / 512.0f / lambda;
1068
-                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]);
1069
+                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512], ROUND_STANDARD);
1069 1070
                 if (quant_max >= 8191) { // too much, return to the previous quantizer
1070 1071
                     sce->sf_idx[w*16+g] = prev_scf;
1071 1072
                     break;
... ...
@@ -1242,19 +1274,19 @@ static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElemen
1242 1242
                                                 sce0->ics.swb_sizes[g],
1243 1243
                                                 sce0->sf_idx[(w+w2)*16+g],
1244 1244
                                                 sce0->band_type[(w+w2)*16+g],
1245
-                                                lambda / band0->threshold, INFINITY, NULL);
1245
+                                                lambda / band0->threshold, INFINITY, NULL, 0);
1246 1246
                     dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128,
1247 1247
                                                 R34,
1248 1248
                                                 sce1->ics.swb_sizes[g],
1249 1249
                                                 sce1->sf_idx[(w+w2)*16+g],
1250 1250
                                                 sce1->band_type[(w+w2)*16+g],
1251
-                                                lambda / band1->threshold, INFINITY, NULL);
1251
+                                                lambda / band1->threshold, INFINITY, NULL, 0);
1252 1252
                     dist2 += quantize_band_cost(s, IS,
1253 1253
                                                 I34,
1254 1254
                                                 sce0->ics.swb_sizes[g],
1255 1255
                                                 is_sf_idx,
1256 1256
                                                 is_band_type,
1257
-                                                lambda / minthr, INFINITY, NULL);
1257
+                                                lambda / minthr, INFINITY, NULL, 0);
1258 1258
                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
1259 1259
                         dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
1260 1260
                         dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
... ...
@@ -1315,25 +1347,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
1315 1315
                                                 sce0->ics.swb_sizes[g],
1316 1316
                                                 sce0->sf_idx[(w+w2)*16+g],
1317 1317
                                                 sce0->band_type[(w+w2)*16+g],
1318
-                                                lambda / band0->threshold, INFINITY, NULL);
1318
+                                                lambda / band0->threshold, INFINITY, NULL, 0);
1319 1319
                     dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128,
1320 1320
                                                 R34,
1321 1321
                                                 sce1->ics.swb_sizes[g],
1322 1322
                                                 sce1->sf_idx[(w+w2)*16+g],
1323 1323
                                                 sce1->band_type[(w+w2)*16+g],
1324
-                                                lambda / band1->threshold, INFINITY, NULL);
1324
+                                                lambda / band1->threshold, INFINITY, NULL, 0);
1325 1325
                     dist2 += quantize_band_cost(s, M,
1326 1326
                                                 M34,
1327 1327
                                                 sce0->ics.swb_sizes[g],
1328 1328
                                                 sce0->sf_idx[(w+w2)*16+g],
1329 1329
                                                 sce0->band_type[(w+w2)*16+g],
1330
-                                                lambda / maxthr, INFINITY, NULL);
1330
+                                                lambda / maxthr, INFINITY, NULL, 0);
1331 1331
                     dist2 += quantize_band_cost(s, S,
1332 1332
                                                 S34,
1333 1333
                                                 sce1->ics.swb_sizes[g],
1334 1334
                                                 sce1->sf_idx[(w+w2)*16+g],
1335 1335
                                                 sce1->band_type[(w+w2)*16+g],
1336
-                                                lambda / minthr, INFINITY, NULL);
1336
+                                                lambda / minthr, INFINITY, NULL, 0);
1337 1337
                 }
1338 1338
                 cpe->ms_mask[w*16+g] = dist2 < dist1;
1339 1339
             }
... ...
@@ -472,13 +472,33 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
472 472
                                                    sce->ics.swb_sizes[i],
473 473
                                                    sce->sf_idx[w*16 + i],
474 474
                                                    sce->band_type[w*16 + i],
475
-                                                   s->lambda);
475
+                                                   s->lambda, sce->ics.window_clipping[w]);
476 476
             start += sce->ics.swb_sizes[i];
477 477
         }
478 478
     }
479 479
 }
480 480
 
481 481
 /**
482
+ * Downscale spectral coefficients for near-clipping windows to avoid artifacts
483
+ */
484
+static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
485
+{
486
+    int start, i, j, w;
487
+
488
+    if (sce->ics.clip_avoidance_factor < 1.0f) {
489
+        for (w = 0; w < sce->ics.num_windows; w++) {
490
+            start = 0;
491
+            for (i = 0; i < sce->ics.max_sfb; i++) {
492
+                float *swb_coeffs = sce->coeffs + start + w*128;
493
+                for (j = 0; j < sce->ics.swb_sizes[i]; j++)
494
+                    swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
495
+                start += sce->ics.swb_sizes[i];
496
+            }
497
+        }
498
+    }
499
+}
500
+
501
+/**
482 502
  * Encode one channel of audio data.
483 503
  */
484 504
 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
... ...
@@ -578,6 +598,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
578 578
         for (ch = 0; ch < chans; ch++) {
579 579
             IndividualChannelStream *ics = &cpe->ch[ch].ics;
580 580
             int cur_channel = start_ch + ch;
581
+            float clip_avoidance_factor;
581 582
             overlap  = &samples[cur_channel][0];
582 583
             samples2 = overlap + 1024;
583 584
             la       = samples2 + (448+64);
... ...
@@ -605,14 +626,29 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
605 605
             ics->num_windows        = wi[ch].num_windows;
606 606
             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
607 607
             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
608
+            clip_avoidance_factor = 0.0f;
608 609
             for (w = 0; w < ics->num_windows; w++)
609 610
                 ics->group_len[w] = wi[ch].grouping[w];
611
+            for (w = 0; w < ics->num_windows; w++) {
612
+                if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
613
+                    ics->window_clipping[w] = 1;
614
+                    clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
615
+                } else {
616
+                    ics->window_clipping[w] = 0;
617
+                }
618
+            }
619
+            if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
620
+                ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
621
+            } else {
622
+                ics->clip_avoidance_factor = 1.0f;
623
+            }
610 624
 
611 625
             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
612 626
             if (isnan(cpe->ch->coeffs[0])) {
613 627
                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
614 628
                 return AVERROR(EINVAL);
615 629
             }
630
+            avoid_clipping(s, &cpe->ch[ch]);
616 631
         }
617 632
         start_ch += chans;
618 633
     }
... ...
@@ -54,7 +54,7 @@ typedef struct AACCoefficientsEncoder {
54 54
     void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
55 55
                                      int win, int group_len, const float lambda);
56 56
     void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
57
-                                     int scale_idx, int cb, const float lambda);
57
+                                     int scale_idx, int cb, const float lambda, int rtz);
58 58
     void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
59 59
     void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce, const float lambda);
60 60
     void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
... ...
@@ -837,6 +837,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
837 837
     int grouping     = 0;
838 838
     int uselongblock = 1;
839 839
     int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
840
+    float clippings[AAC_NUM_BLOCKS_SHORT];
840 841
     int i;
841 842
     FFPsyWindowInfo wi = { { 0 } };
842 843
 
... ...
@@ -926,14 +927,35 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
926 926
 
927 927
     lame_apply_block_type(pch, &wi, uselongblock);
928 928
 
929
+    /* Calculate input sample maximums and evaluate clipping risk */
930
+    if (audio) {
931
+        for (i = 0; i < AAC_NUM_BLOCKS_SHORT; i++) {
932
+            const float *wbuf = audio + i * AAC_BLOCK_SIZE_SHORT;
933
+            float max = 0;
934
+            int j;
935
+            for (j = 0; j < AAC_BLOCK_SIZE_SHORT; j++)
936
+                max = FFMAX(max, fabsf(wbuf[j]));
937
+            clippings[i] = max;
938
+        }
939
+    } else {
940
+        for (i = 0; i < 8; i++)
941
+            clippings[i] = 0;
942
+    }
943
+
929 944
     wi.window_type[1] = prev_type;
930 945
     if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
946
+        float clipping = 0.0f;
947
+
931 948
         wi.num_windows  = 1;
932 949
         wi.grouping[0]  = 1;
933 950
         if (wi.window_type[0] == LONG_START_SEQUENCE)
934 951
             wi.window_shape = 0;
935 952
         else
936 953
             wi.window_shape = 1;
954
+
955
+        for (i = 0; i < 8; i++)
956
+            clipping = FFMAX(clipping, clippings[i]);
957
+        wi.clipping[0] = clipping;
937 958
     } else {
938 959
         int lastgrp = 0;
939 960
 
... ...
@@ -944,6 +966,14 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
944 944
                 lastgrp = i;
945 945
             wi.grouping[lastgrp]++;
946 946
         }
947
+
948
+        for (i = 0; i < 8; i += wi.grouping[i]) {
949
+            int w;
950
+            float clipping = 0.0f;
951
+            for (w = 0; w < wi.grouping[i] && !clipping; w++)
952
+                clipping = FFMAX(clipping, clippings[i+w]);
953
+            wi.clipping[i] = clipping;
954
+        }
947 955
     }
948 956
 
949 957
     /* Determine grouping, based on the location of the first attack, and save for
... ...
@@ -66,6 +66,7 @@ typedef struct FFPsyWindowInfo {
66 66
     int window_shape;                 ///< window shape (sine/KBD/whatever)
67 67
     int num_windows;                  ///< number of windows in a frame
68 68
     int grouping[8];                  ///< window grouping (for e.g. AAC)
69
+    float clipping[8];                ///< maximum absolute normalized intensity in the given window for clip avoidance
69 70
     int *window_sizes;                ///< sequence of window sizes inside one frame (for eg. WMA)
70 71
 } FFPsyWindowInfo;
71 72
 
... ...
@@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aa
146 146
 fate-aac-aref-encode: CMP = stddev
147 147
 fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
148 148
 fate-aac-aref-encode: CMP_SHIFT = -4096
149
-fate-aac-aref-encode: CMP_TARGET = 434
149
+fate-aac-aref-encode: CMP_TARGET = 594
150 150
 fate-aac-aref-encode: SIZE_TOLERANCE = 2464
151 151
 fate-aac-aref-encode: FUZZ = 5
152 152
 
... ...
@@ -155,7 +155,7 @@ fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-ref
155 155
 fate-aac-ln-encode: CMP = stddev
156 156
 fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
157 157
 fate-aac-ln-encode: CMP_SHIFT = -4096
158
-fate-aac-ln-encode: CMP_TARGET = 65
158
+fate-aac-ln-encode: CMP_TARGET = 68
159 159
 fate-aac-ln-encode: SIZE_TOLERANCE = 3560
160 160
 
161 161
 FATE_AAC_LATM += fate-aac-latm_000000001180bc60