GitList

Browse code

AAC Encoder: clipping avoidance

Avoid clipping due to quantization noise to produce audible
artifacts, by detecting near-clipping signals and both attenuating
them a little and encoding escape-encoded bands (usually the
loudest) rounding towards zero instead of nearest, which tends to
decrease overall energy and thus clipping.

Currently fate tests measure numerical error so this change makes
tests using asynth (which are near clipping) report higher error
not less, because of window attenuation. Yet, they sound better,
not worse (albeit subtle, other samples aren't subtle at all).
Only measuring psychoacoustically weighted error would make for
a representative test, so that will be left for a future patch.

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>

Claudio Freire authored on 2015/07/21 10:53:24
Showing 7 changed files

libavcodec/aac.h index d62455d..3e3e479 100644
libavcodec/aaccoder.c index 17b14d6..eb58342 100644
libavcodec/aacenc.c index a3c31de..c3c72af 100644
libavcodec/aacenc.h index 4210455..1f05aab 100644
libavcodec/aacpsy.c index b16f6b9..a5474b9 100644
libavcodec/psymodel.h index 2e3ab91..e9be1f6 100644
tests/fate/aac.mak index 7ebec45..9b08959 100644

libavcodec/aac.h

History View file @ 59216e0

@@ -50,6 +50,8 @@
                      #define TNS_MAX_ORDER 20
                      #define MAX_LTP_LONG_SFB 40
                     +#define CLIP_AVOIDANCE_FACTOR 0.95f
+                    +
                      enum RawDataBlockType {
                          TYPE_SCE,
                          TYPE_CPE,
@@ -180,6 +182,8 @@ typedef struct IndividualChannelStream {
                          int predictor_initialized;
                          int predictor_reset_group;
                          uint8_t prediction_used[41];
                     +    uint8_t window_clipping[8]; ///< set if a certain window is near clipping
                     +    float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it
                      } IndividualChannelStream;
                      /**

libavcodec/aaccoder.c

History View file @ 59216e0

@@ -79,6 +79,9 @@ static const uint8_t * const run_value_bits[2] = {
                          run_value_bits_long, run_value_bits_short
                      };
                     +#define ROUND_STANDARD 0.4054f
                     +#define ROUND_TO_ZERO 0.1054f
+                    +
                      /** Map to convert values from BandCodingPath index to a codebook index **/
                      static const uint8_t aac_cb_out_map[CB_TOT_ALL]  = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
                      /** Inverse map to convert from codebooks to BandCodingPath indices **/
@@ -89,20 +92,20 @@ static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,
                       * @return absolute value of the quantized coefficient
                       * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
                       */
                     -static av_always_inline int quant(float coef, const float Q)
                     +static av_always_inline int quant(float coef, const float Q, const float rounding)
+                     {
                          float a = coef * Q;
                     -    return sqrtf(a * sqrtf(a)) + 0.4054;
                     +    return sqrtf(a * sqrtf(a)) + rounding;
+                     }
                      static void quantize_bands(int *out, const float *in, const float *scaled,
                     -                           int size, float Q34, int is_signed, int maxval)
                     +                           int size, float Q34, int is_signed, int maxval, const float rounding)
+                     {
                          int i;
                          double qc;
                          for (i = 0; i < size; i++) {
                              qc = scaled[i] * Q34;
                     -        out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
                     +        out[i] = (int)FFMIN(qc + rounding, (double)maxval);
                              if (is_signed && in[i] < 0.0f) {
                                  out[i] = -out[i];
+                             }
@@ -134,7 +137,8 @@ static av_always_inline float quantize_and_encode_band_cost_template(
                                                      const float *scaled, int size, int scale_idx,
                                                      int cb, const float lambda, const float uplim,
                                                      int *bits, int BT_ZERO, int BT_UNSIGNED,
                     -                                int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO)
                     +                                int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
                     +                                const float ROUNDING)
+                     {
                          const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
                          const float Q   = ff_aac_pow2sf_tab [q_idx];
@@ -158,7 +162,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
                              abs_pow34_v(s->scoefs, in, size);
                              scaled = s->scoefs;
+                         }
                     -    quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb]);
                     +    quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb], ROUNDING);
                          if (BT_UNSIGNED) {
                              off = 0;
                          } else {
@@ -185,7 +189,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
                                              di = t - CLIPPED_ESCAPE;
                                              curbits += 21;
                                          } else {
                     -                        int c = av_clip_uintp2(quant(t, Q), 13);
                     +                        int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
                                              di = t - c*cbrtf(c)*IQ;
                                              curbits += av_log2(c)*2 - 4 + 1;
+                                         }
@@ -215,7 +219,7 @@ static av_always_inline float quantize_and_encode_band_cost_template(
                                  if (BT_ESC) {
                                      for (j = 0; j < 2; j++) {
                                          if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
                     -                        int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q), 13);
                     +                        int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q, ROUNDING), 13);
                                              int len = av_log2(coef);
                                              put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
@@ -240,7 +244,7 @@ static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitC
                          return 0.0f;
+                     }
                     -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO) \
                     +#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
                      static float quantize_and_encode_band_cost_ ## NAME(                                         \
                                                      struct AACEncContext *s,                                     \
                                                      PutBitContext *pb, const float *in,                          \
@@ -250,17 +254,19 @@ static float quantize_and_encode_band_cost_ ## NAME(
                          return quantize_and_encode_band_cost_template(                                           \
                                                      s, pb, in, scaled, size, scale_idx,                          \
                                                      BT_ESC ? ESC_BT : cb, lambda, uplim, bits,                   \
                     -                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO); \
                     +                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO,  \
                     +                                ROUNDING);                                                   \
+                     }
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0)
                     -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC_RTZ, 0, 1, 1, 1, 0, 0, ROUND_TO_ZERO)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0, ROUND_STANDARD)
                     +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1, ROUND_STANDARD)
                      static float (*const quantize_and_encode_band_cost_arr[])(
                                                      struct AACEncContext *s,
@@ -286,28 +292,52 @@ static float (*const quantize_and_encode_band_cost_arr[])(
                          quantize_and_encode_band_cost_STEREO,
                      };
                     +static float (*const quantize_and_encode_band_cost_rtz_arr[])(
                     +                                struct AACEncContext *s,
                     +                                PutBitContext *pb, const float *in,
                     +                                const float *scaled, int size, int scale_idx,
                     +                                int cb, const float lambda, const float uplim,
                     +                                int *bits) = {
                     +    quantize_and_encode_band_cost_ZERO,
                     +    quantize_and_encode_band_cost_SQUAD,
                     +    quantize_and_encode_band_cost_SQUAD,
                     +    quantize_and_encode_band_cost_UQUAD,
                     +    quantize_and_encode_band_cost_UQUAD,
                     +    quantize_and_encode_band_cost_SPAIR,
                     +    quantize_and_encode_band_cost_SPAIR,
                     +    quantize_and_encode_band_cost_UPAIR,
                     +    quantize_and_encode_band_cost_UPAIR,
                     +    quantize_and_encode_band_cost_UPAIR,
                     +    quantize_and_encode_band_cost_UPAIR,
                     +    quantize_and_encode_band_cost_ESC_RTZ,
                     +    quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
                     +    quantize_and_encode_band_cost_NOISE,
                     +    quantize_and_encode_band_cost_STEREO,
                     +    quantize_and_encode_band_cost_STEREO,
                     +};
+                    +
                      #define quantize_and_encode_band_cost(                                  \
                                                      s, pb, in, scaled, size, scale_idx, cb, \
                     -                                lambda, uplim, bits)                    \
                     -    quantize_and_encode_band_cost_arr[cb](                              \
                     +                                lambda, uplim, bits, rtz)               \
                     +    ((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
                                                      s, pb, in, scaled, size, scale_idx, cb, \
                                                      lambda, uplim, bits)
                      static float quantize_band_cost(struct AACEncContext *s, const float *in,
                                                      const float *scaled, int size, int scale_idx,
                                                      int cb, const float lambda, const float uplim,
                     -                                int *bits)
                     +                                int *bits, int rtz)
+                     {
                          return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
                     -                                         cb, lambda, uplim, bits);
                     +                                         cb, lambda, uplim, bits, rtz);
+                     }
                      static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
                                                           const float *in, int size, int scale_idx,
                     -                                     int cb, const float lambda)
                     +                                     int cb, const float lambda, int rtz)
+                     {
                          quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
                     -                                  INFINITY, NULL);
                     +                                  INFINITY, NULL, rtz);
+                     }
                      static float find_max_val(int group_len, int swb_size, const float *scaled) {
@@ -397,7 +427,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
                                          rd += quantize_band_cost(s, sce->coeffs + start + w*128,
                                                                   s->scoefs + start + w*128, size,
                                                                   sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
                     -                                             lambda / band->threshold, INFINITY, NULL);
                     +                                             lambda / band->threshold, INFINITY, NULL, 0);
+                                     }
                                      cost_stay_here = path[swb][cb].cost + rd;
                                      cost_get_here  = minrd              + rd + run_bits + 4;
@@ -527,9 +557,9 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
                                      for (w = 0; w < group_len; w++) {
                                          bits += quantize_band_cost(s, sce->coeffs + start + w*128,
                                                                     s->scoefs + start + w*128, size,
                     -                                               sce->sf_idx[(win+w)*16+swb],
                     +                                               sce->sf_idx[win*16+swb],
                                                                     aac_cb_out_map[cb],
                     -                                               0, INFINITY, NULL);
                     +                                               0, INFINITY, NULL, 0);
+                                     }
                                      cost_stay_here = path[swb][cb].cost + bits;
                                      cost_get_here  = minbits            + bits + run_bits + 4;
@@ -749,7 +779,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                                          for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                                              FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                                              dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
                     -                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL);
                     +                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
+                                         }
                                          minrd = FFMIN(minrd, dist);
@@ -895,7 +925,8 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
                                                                         cb,
 .0f,
                                                                         INFINITY,
                     -                                                   &b);
                     +                                                   &b,
                     +                                                   0);
                                              bits += b;
+                                         }
                                          dists[w*16+g] = dist - bits;
@@ -1061,11 +1092,12 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
                                                                     ESC_BT,
                                                                     lambda,
                                                                     INFINITY,
                     -                                               &b);
                     +                                               &b,
                     +                                               0);
                                          dist -= b;
+                                     }
                                      dist *= 1.0f / 512.0f / lambda;
                     -                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]);
                     +                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512], ROUND_STANDARD);
                                      if (quant_max >= 8191) { // too much, return to the previous quantizer
                                          sce->sf_idx[w*16+g] = prev_scf;
                                          break;
@@ -1242,19 +1274,19 @@ static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElemen
                                                                      sce0->ics.swb_sizes[g],
                                                                      sce0->sf_idx[(w+w2)*16+g],
                                                                      sce0->band_type[(w+w2)*16+g],
                     -                                                lambda / band0->threshold, INFINITY, NULL);
                     +                                                lambda / band0->threshold, INFINITY, NULL, 0);
                                          dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128,
                                                                      R34,
                                                                      sce1->ics.swb_sizes[g],
                                                                      sce1->sf_idx[(w+w2)*16+g],
                                                                      sce1->band_type[(w+w2)*16+g],
                     -                                                lambda / band1->threshold, INFINITY, NULL);
                     +                                                lambda / band1->threshold, INFINITY, NULL, 0);
                                          dist2 += quantize_band_cost(s, IS,
                                                                      I34,
                                                                      sce0->ics.swb_sizes[g],
                                                                      is_sf_idx,
                                                                      is_band_type,
                     -                                                lambda / minthr, INFINITY, NULL);
                     +                                                lambda / minthr, INFINITY, NULL, 0);
                                          for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
                                              dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
                                              dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
@@ -1315,25 +1347,25 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
                                                                      sce0->ics.swb_sizes[g],
                                                                      sce0->sf_idx[(w+w2)*16+g],
                                                                      sce0->band_type[(w+w2)*16+g],
                     -                                                lambda / band0->threshold, INFINITY, NULL);
                     +                                                lambda / band0->threshold, INFINITY, NULL, 0);
                                          dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128,
                                                                      R34,
                                                                      sce1->ics.swb_sizes[g],
                                                                      sce1->sf_idx[(w+w2)*16+g],
                                                                      sce1->band_type[(w+w2)*16+g],
                     -                                                lambda / band1->threshold, INFINITY, NULL);
                     +                                                lambda / band1->threshold, INFINITY, NULL, 0);
                                          dist2 += quantize_band_cost(s, M,
                                                                      M34,
                                                                      sce0->ics.swb_sizes[g],
                                                                      sce0->sf_idx[(w+w2)*16+g],
                                                                      sce0->band_type[(w+w2)*16+g],
                     -                                                lambda / maxthr, INFINITY, NULL);
                     +                                                lambda / maxthr, INFINITY, NULL, 0);
                                          dist2 += quantize_band_cost(s, S,
                                                                      S34,
                                                                      sce1->ics.swb_sizes[g],
                                                                      sce1->sf_idx[(w+w2)*16+g],
                                                                      sce1->band_type[(w+w2)*16+g],
                     -                                                lambda / minthr, INFINITY, NULL);
                     +                                                lambda / minthr, INFINITY, NULL, 0);
+                                     }
                                      cpe->ms_mask[w*16+g] = dist2 < dist1;
+                                 }

libavcodec/aacenc.c

History View file @ 59216e0

@@ -472,13 +472,33 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
                                                                         sce->ics.swb_sizes[i],
                                                                         sce->sf_idx[w*16 + i],
                                                                         sce->band_type[w*16 + i],
                     -                                                   s->lambda);
                     +                                                   s->lambda, sce->ics.window_clipping[w]);
                                  start += sce->ics.swb_sizes[i];
+                             }
+                         }
+                     }
                      /**
                     + * Downscale spectral coefficients for near-clipping windows to avoid artifacts
                     + */
                     +static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
                     +{
                     +    int start, i, j, w;
+                    +
                     +    if (sce->ics.clip_avoidance_factor < 1.0f) {
                     +        for (w = 0; w < sce->ics.num_windows; w++) {
                     +            start = 0;
                     +            for (i = 0; i < sce->ics.max_sfb; i++) {
                     +                float *swb_coeffs = sce->coeffs + start + w*128;
                     +                for (j = 0; j < sce->ics.swb_sizes[i]; j++)
                     +                    swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
                     +                start += sce->ics.swb_sizes[i];
                     +            }
                     +        }
                     +    }
                     +}
+                    +
                     +/**
                       * Encode one channel of audio data.
                       */
                      static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
@@ -578,6 +598,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                              for (ch = 0; ch < chans; ch++) {
                                  IndividualChannelStream *ics = &cpe->ch[ch].ics;
                                  int cur_channel = start_ch + ch;
                     +            float clip_avoidance_factor;
                                  overlap  = &samples[cur_channel][0];
                                  samples2 = overlap + 1024;
                                  la       = samples2 + (448+64);
@@ -605,14 +626,29 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                  ics->num_windows        = wi[ch].num_windows;
                                  ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
                                  ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
                     +            clip_avoidance_factor = 0.0f;
                                  for (w = 0; w < ics->num_windows; w++)
                                      ics->group_len[w] = wi[ch].grouping[w];
                     +            for (w = 0; w < ics->num_windows; w++) {
                     +                if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
                     +                    ics->window_clipping[w] = 1;
                     +                    clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
                     +                } else {
                     +                    ics->window_clipping[w] = 0;
                     +                }
                     +            }
                     +            if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
                     +                ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
                     +            } else {
                     +                ics->clip_avoidance_factor = 1.0f;
                     +            }
                                  apply_window_and_mdct(s, &cpe->ch[ch], overlap);
                                  if (isnan(cpe->ch->coeffs[0])) {
                                      av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
                                      return AVERROR(EINVAL);
+                                 }
                     +            avoid_clipping(s, &cpe->ch[ch]);
+                             }
                              start_ch += chans;
+                         }

libavcodec/aacenc.h

History View file @ 59216e0

@@ -54,7 +54,7 @@ typedef struct AACCoefficientsEncoder {
                          void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
                                                           int win, int group_len, const float lambda);
                          void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
                     -                                     int scale_idx, int cb, const float lambda);
                     +                                     int scale_idx, int cb, const float lambda, int rtz);
                          void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
                          void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce, const float lambda);
                          void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);

libavcodec/aacpsy.c

History View file @ 59216e0

@@ -837,6 +837,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
                          int grouping     = 0;
                          int uselongblock = 1;
                          int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
                     +    float clippings[AAC_NUM_BLOCKS_SHORT];
                          int i;
                          FFPsyWindowInfo wi = { { 0 } };
@@ -926,14 +927,35 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
                          lame_apply_block_type(pch, &wi, uselongblock);
                     +    /* Calculate input sample maximums and evaluate clipping risk */
                     +    if (audio) {
                     +        for (i = 0; i < AAC_NUM_BLOCKS_SHORT; i++) {
                     +            const float *wbuf = audio + i * AAC_BLOCK_SIZE_SHORT;
                     +            float max = 0;
                     +            int j;
                     +            for (j = 0; j < AAC_BLOCK_SIZE_SHORT; j++)
                     +                max = FFMAX(max, fabsf(wbuf[j]));
                     +            clippings[i] = max;
                     +        }
                     +    } else {
                     +        for (i = 0; i < 8; i++)
                     +            clippings[i] = 0;
                     +    }
+                    +
                          wi.window_type[1] = prev_type;
                          if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
                     +        float clipping = 0.0f;
+                    +
                              wi.num_windows  = 1;
                              wi.grouping[0]  = 1;
                              if (wi.window_type[0] == LONG_START_SEQUENCE)
                                  wi.window_shape = 0;
                              else
                                  wi.window_shape = 1;
+                    +
                     +        for (i = 0; i < 8; i++)
                     +            clipping = FFMAX(clipping, clippings[i]);
                     +        wi.clipping[0] = clipping;
                          } else {
                              int lastgrp = 0;
@@ -944,6 +966,14 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
                                      lastgrp = i;
                                  wi.grouping[lastgrp]++;
+                             }
+                    +
                     +        for (i = 0; i < 8; i += wi.grouping[i]) {
                     +            int w;
                     +            float clipping = 0.0f;
                     +            for (w = 0; w < wi.grouping[i] && !clipping; w++)
                     +                clipping = FFMAX(clipping, clippings[i+w]);
                     +            wi.clipping[i] = clipping;
                     +        }
+                         }
                          /* Determine grouping, based on the location of the first attack, and save for

libavcodec/psymodel.h

History View file @ 59216e0

@@ -66,6 +66,7 @@ typedef struct FFPsyWindowInfo {
                          int window_shape;                 ///< window shape (sine/KBD/whatever)
                          int num_windows;                  ///< number of windows in a frame
                          int grouping[8];                  ///< window grouping (for e.g. AAC)
                     +    float clipping[8];                ///< maximum absolute normalized intensity in the given window for clip avoidance
                          int *window_sizes;                ///< sequence of window sizes inside one frame (for eg. WMA)
                      } FFPsyWindowInfo;

tests/fate/aac.mak

History View file @ 59216e0

@@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aa
                      fate-aac-aref-encode: CMP = stddev
                      fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
                      fate-aac-aref-encode: CMP_SHIFT = -4096
                     -fate-aac-aref-encode: CMP_TARGET = 434
                     +fate-aac-aref-encode: CMP_TARGET = 594
                      fate-aac-aref-encode: SIZE_TOLERANCE = 2464
                      fate-aac-aref-encode: FUZZ = 5
@@ -155,7 +155,7 @@ fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-ref
                      fate-aac-ln-encode: CMP = stddev
                      fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
                      fate-aac-ln-encode: CMP_SHIFT = -4096
                     -fate-aac-ln-encode: CMP_TARGET = 65
                     +fate-aac-ln-encode: CMP_TARGET = 68
                      fate-aac-ln-encode: SIZE_TOLERANCE = 3560
                      FATE_AAC_LATM += fate-aac-latm_000000001180bc60