GitList

Browse code

psymodel: Add channels and channel groups to the psymodel.

Nathan Caldwell authored on 2011/06/15 17:50:25
Showing 5 changed files

libavcodec/aaccoder.c index e752b63..b64bf9f 100644
libavcodec/aacenc.c index 8c7ed87..1df16ad 100644
libavcodec/aacpsy.c index cf8f7eb..79d7084 100644
libavcodec/psymodel.c index fe9363c..740d859 100644
libavcodec/psymodel.h index 02dc9c9..241f8ec 100644

@@ -345,7 +345,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
                                      float cost_stay_here, cost_get_here;
                                      float rd = 0.0f;
                                      for (w = 0; w < group_len; w++) {
                     -                    FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
                     +                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
                                          rd += quantize_band_cost(s, sce->coeffs + start + w*128,
                                                                   s->scoefs + start + w*128, size,
                                                                   sce->sf_idx[(win+w)*16+swb], cb,
@@ -625,7 +625,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                                  qmin = INT_MAX;
                                  qmax = 0.0f;
                                  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                     -                FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
                     +                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                                      if (band->energy <= band->threshold || band->threshold == 0.0f) {
                                          sce->zeroes[(w+w2)*16+g] = 1;
                                          continue;
@@ -654,7 +654,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                                          float dist = 0;
                                          int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
                                          for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                     -                        FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
                     +                        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                                              dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
                                                                         q + q0, cb, lambda / band->threshold, INFINITY, NULL);
+                                         }
@@ -727,7 +727,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
                                  int nz = 0;
                                  float uplim = 0.0f;
                                  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                     -                FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
                     +                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                                      uplim += band->threshold;
                                      if (band->energy <= band->threshold || band->threshold == 0.0f) {
                                          sce->zeroes[(w+w2)*16+g] = 1;
@@ -1027,7 +1027,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
                          for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
                              for (g = 0; g < sce->ics.num_swb; g++) {
                                  for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                     -                FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
                     +                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                                      if (band->energy <= band->threshold) {
                                          sce->sf_idx[(w+w2)*16+g] = 218;
                                          sce->zeroes[(w+w2)*16+g] = 1;
@@ -1065,8 +1065,8 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
                                  if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
                                      float dist1 = 0.0f, dist2 = 0.0f;
                                      for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
                     -                    FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
                     -                    FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
                     +                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
                     +                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
                                          float minthr = FFMIN(band0->threshold, band1->threshold);
                                          float maxthr = FFMAX(band0->threshold, band1->threshold);
                                          for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {

libavcodec/aacenc.c

History View file @ 0bc01cc

@@ -210,7 +210,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
                          sizes[1]   = swb_size_128[i];
                          lengths[0] = ff_aac_num_swb_1024[i];
                          lengths[1] = ff_aac_num_swb_128[i];
                     -    ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
                     +    ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], &s->chan_map[1]);
                          s->psypp = ff_psy_preprocess_init(avctx);
                          s->coder = &ff_aac_coders[2];
@@ -570,8 +570,8 @@ static int aac_encode_frame(AVCodecContext *avctx,
                                  put_bits(&s->pb, 3, tag);
                                  put_bits(&s->pb, 4, chan_el_counter[tag]++);
                                  for (ch = 0; ch < chans; ch++) {
                     -                s->cur_channel = start_ch + ch;
                     -                s->psy.model->analyze(&s->psy, s->cur_channel, cpe->ch[ch].coeffs, &wi[ch]);
                     +                s->cur_channel = start_ch * 2 + ch;
                     +                s->psy.model->analyze(&s->psy, start_ch + ch, cpe->ch[ch].coeffs, &wi[ch]);
                                      s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
+                                 }
                                  cpe->common_window = 0;
@@ -587,7 +587,7 @@ static int aac_encode_frame(AVCodecContext *avctx,
+                                         }
+                                     }
+                                 }
                     -            s->cur_channel = start_ch;
                     +            s->cur_channel = start_ch * 2;
                                  if (s->options.stereo_mode && cpe->common_window) {
                                      if (s->options.stereo_mode > 0) {
                                          IndividualChannelStream *ics = &cpe->ch[0].ics;

libavcodec/aacpsy.c

History View file @ 0bc01cc

@@ -627,7 +627,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
+                         }
                          /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
                     -    ctx->pe[channel] = pe;
                     +    ctx->ch[channel].entropy = pe;
                          desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
                          desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
                          /* NOTE: PE correction is kept simple. During initial testing it had very
@@ -731,7 +731,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
                          for (w = 0; w < wi->num_windows*16; w += 16) {
                              for (g = 0; g < num_bands; g++) {
                                  AacPsyBand *band     = &pch->band[w+g];
                     -            FFPsyBand  *psy_band = &ctx->psy_bands[channel*PSY_MAX_BANDS+w+g];
                     +            FFPsyBand  *psy_band = &ctx->ch[channel].psy_bands[w+g];
                                  psy_band->threshold = band->thr;
                                  psy_band->energy    = band->energy;
@@ -921,5 +921,6 @@ const FFPsyModel ff_aac_psy_model =
                          .init    = psy_3gpp_init,
                          .window  = psy_lame_window,
                          .analyze = psy_3gpp_analyze,
                     +    .analyze_group = NULL,
                          .end     = psy_3gpp_end,
                      };

libavcodec/psymodel.c

History View file @ 0bc01cc

@@ -25,16 +25,31 @@
                      extern const FFPsyModel ff_aac_psy_model;
                     -av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
                     -                        int num_lens,
                     -                        const uint8_t **bands, const int* num_bands)
                     +av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
                     +                        const uint8_t **bands, const int* num_bands,
                     +                        int num_groups, const uint8_t *group_map)
+                     {
                     +    int i, j, k = 0;
+                    +
                          ctx->avctx = avctx;
                     -    ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels);
                     +    ctx->ch        = av_mallocz(sizeof(ctx->ch[0]) * avctx->channels * 2);
                     +    ctx->group     = av_mallocz(sizeof(ctx->group[0]) * num_groups);
                          ctx->bands     = av_malloc (sizeof(ctx->bands[0])     * num_lens);
                          ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens);
                          memcpy(ctx->bands,     bands,     sizeof(ctx->bands[0])     *  num_lens);
                          memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) *  num_lens);
+                    +
                     +    /* assign channels to groups (with virtual channels for coupling) */
                     +    for (i = 0; i < num_groups; i++) {
                     +        /* NOTE: Add 1 to handle the AAC chan_config without modification.
                     +         *       This has the side effect of allowing an array of 0s to map
                     +         *       to one channel per group.
                     +         */
                     +        ctx->group[i].num_ch = group_map[i] + 1;
                     +        for (j = 0; j < ctx->group[i].num_ch * 2; j++)
                     +            ctx->group[i].ch[j]  = &ctx->ch[k++];
                     +    }
+                    +
                          switch (ctx->avctx->codec_id) {
                          case CODEC_ID_AAC:
                              ctx->model = &ff_aac_psy_model;
@@ -45,13 +60,24 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
                          return 0;
+                     }
                     +FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel)
                     +{
                     +    int i = 0, ch = 0;
+                    +
                     +    while (ch <= channel)
                     +        ch += ctx->group[i++].num_ch;
+                    +
                     +    return &ctx->group[i-1];
                     +}
+                    +
                      av_cold void ff_psy_end(FFPsyContext *ctx)
+                     {
                          if (ctx->model->end)
                              ctx->model->end(ctx);
                          av_freep(&ctx->bands);
                          av_freep(&ctx->num_bands);
                     -    av_freep(&ctx->psy_bands);
                     +    av_freep(&ctx->group);
                     +    av_freep(&ctx->ch);
+                     }
                      typedef struct FFPsyPreprocessContext{

libavcodec/psymodel.h

History View file @ 0bc01cc

@@ -41,6 +41,23 @@ typedef struct FFPsyBand {
                      } FFPsyBand;
                      /**
                     + * single channel psychoacoustic information
                     + */
                     +typedef struct FFPsyChannel {
                     +    FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information
                     +    float     entropy;                  ///< total PE for this channel
                     +} FFPsyChannel;
+                    +
                     +/**
                     + * psychoacoustic information for an arbitrary group of channels
                     + */
                     +typedef struct FFPsyChannelGroup {
                     +    FFPsyChannel *ch[PSY_MAX_CHANS];  ///< pointers to the individual channels in the group
                     +    uint8_t num_ch;                   ///< number of channels in this group
                     +    uint8_t coupling[PSY_MAX_BANDS];  ///< allow coupling for this band in the group
                     +} FFPsyChannelGroup;
+                    +
                     +/**
                       * windowing related information
                       */
                      typedef struct FFPsyWindowInfo {
@@ -58,14 +75,14 @@ typedef struct FFPsyContext {
                          AVCodecContext *avctx;            ///< encoder context
                          const struct FFPsyModel *model;   ///< encoder-specific model functions
                     -    FFPsyBand *psy_bands;             ///< frame bands information
                     +    FFPsyChannel      *ch;            ///< single channel information
                     +    FFPsyChannelGroup *group;         ///< channel group information
                     +    int num_groups;                   ///< number of channel groups
                          uint8_t **bands;                  ///< scalefactor band sizes for possible frame sizes
                          int     *num_bands;               ///< number of scalefactor bands for possible frame sizes
                          int num_lens;                     ///< number of scalefactor band sets
                     -    float pe[PSY_MAX_CHANS];          ///< total PE for each channel in the frame
+                    -
                          struct {
                              int size;                     ///< size of the bitresevoir in bits
                              int bits;                     ///< number of bits used in the bitresevoir
@@ -95,7 +112,7 @@ typedef struct FFPsyModel {
                          FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
                          /**
                     -     * Perform psychoacoustic analysis and set band info (threshold, energy).
                     +     * Perform psychoacoustic analysis and set band info (threshold, energy) for a single channel.
+                          *
                           * @param ctx     model context
                           * @param channel audio channel number
@@ -104,6 +121,16 @@ typedef struct FFPsyModel {
                           */
                          void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi);
                     +    /**
                     +     * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.
                     +     *
                     +     * @param ctx      model context
                     +     * @param channel  channel number of the first channel in the group to perform analysis on
                     +     * @param coeffs   array of pointers to the transformed coefficients
                     +     * @param wi       window information for the channels in the group
                     +     */
                     +    void (*analyze_group)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);
+                    +
                          void (*end)    (FFPsyContext *apc);
                      } FFPsyModel;
@@ -115,12 +142,24 @@ typedef struct FFPsyModel {
                       * @param num_lens   number of possible frame lengths
                       * @param bands      scalefactor band lengths for all frame lengths
                       * @param num_bands  number of scalefactor bands for all frame lengths
                     + * @param num_groups number of channel groups
                     + * @param group_map  array with # of channels in group - 1, for each group
+                      *
                       * @return zero if successful, a negative value if not
                       */
                     -av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
                     -                        int num_lens,
                     -                        const uint8_t **bands, const int* num_bands);
                     +av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
                     +                        const uint8_t **bands, const int* num_bands,
                     +                        int num_groups, const uint8_t *group_map);
+                    +
                     +/**
                     + * Determine what group a channel belongs to.
                     + *
                     + * @param ctx     psymodel context
                     + * @param channel channel to locate the group for
                     + *
                     + * @return pointer to the FFPsyChannelGroup this channel belongs to
                     + */
                     +FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel);
                      /**
                       * Cleanup model context at the end.