Browse code

psymodel: Add channels and channel groups to the psymodel.

Nathan Caldwell authored on 2011/06/15 17:50:25
Showing 5 changed files
... ...
@@ -345,7 +345,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
345 345
                 float cost_stay_here, cost_get_here;
346 346
                 float rd = 0.0f;
347 347
                 for (w = 0; w < group_len; w++) {
348
-                    FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
348
+                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
349 349
                     rd += quantize_band_cost(s, sce->coeffs + start + w*128,
350 350
                                              s->scoefs + start + w*128, size,
351 351
                                              sce->sf_idx[(win+w)*16+swb], cb,
... ...
@@ -625,7 +625,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
625 625
             qmin = INT_MAX;
626 626
             qmax = 0.0f;
627 627
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
628
-                FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
628
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
629 629
                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
630 630
                     sce->zeroes[(w+w2)*16+g] = 1;
631 631
                     continue;
... ...
@@ -654,7 +654,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
654 654
                     float dist = 0;
655 655
                     int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
656 656
                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
657
-                        FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
657
+                        FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
658 658
                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
659 659
                                                    q + q0, cb, lambda / band->threshold, INFINITY, NULL);
660 660
                     }
... ...
@@ -727,7 +727,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
727 727
             int nz = 0;
728 728
             float uplim = 0.0f;
729 729
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
730
-                FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
730
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
731 731
                 uplim += band->threshold;
732 732
                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
733 733
                     sce->zeroes[(w+w2)*16+g] = 1;
... ...
@@ -1027,7 +1027,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
1027 1027
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
1028 1028
         for (g = 0; g < sce->ics.num_swb; g++) {
1029 1029
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
1030
-                FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
1030
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
1031 1031
                 if (band->energy <= band->threshold) {
1032 1032
                     sce->sf_idx[(w+w2)*16+g] = 218;
1033 1033
                     sce->zeroes[(w+w2)*16+g] = 1;
... ...
@@ -1065,8 +1065,8 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
1065 1065
             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
1066 1066
                 float dist1 = 0.0f, dist2 = 0.0f;
1067 1067
                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
1068
-                    FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
1069
-                    FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
1068
+                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
1069
+                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
1070 1070
                     float minthr = FFMIN(band0->threshold, band1->threshold);
1071 1071
                     float maxthr = FFMAX(band0->threshold, band1->threshold);
1072 1072
                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
... ...
@@ -210,7 +210,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
210 210
     sizes[1]   = swb_size_128[i];
211 211
     lengths[0] = ff_aac_num_swb_1024[i];
212 212
     lengths[1] = ff_aac_num_swb_128[i];
213
-    ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
213
+    ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], &s->chan_map[1]);
214 214
     s->psypp = ff_psy_preprocess_init(avctx);
215 215
     s->coder = &ff_aac_coders[2];
216 216
 
... ...
@@ -570,8 +570,8 @@ static int aac_encode_frame(AVCodecContext *avctx,
570 570
             put_bits(&s->pb, 3, tag);
571 571
             put_bits(&s->pb, 4, chan_el_counter[tag]++);
572 572
             for (ch = 0; ch < chans; ch++) {
573
-                s->cur_channel = start_ch + ch;
574
-                s->psy.model->analyze(&s->psy, s->cur_channel, cpe->ch[ch].coeffs, &wi[ch]);
573
+                s->cur_channel = start_ch * 2 + ch;
574
+                s->psy.model->analyze(&s->psy, start_ch + ch, cpe->ch[ch].coeffs, &wi[ch]);
575 575
                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
576 576
             }
577 577
             cpe->common_window = 0;
... ...
@@ -587,7 +587,7 @@ static int aac_encode_frame(AVCodecContext *avctx,
587 587
                     }
588 588
                 }
589 589
             }
590
-            s->cur_channel = start_ch;
590
+            s->cur_channel = start_ch * 2;
591 591
             if (s->options.stereo_mode && cpe->common_window) {
592 592
                 if (s->options.stereo_mode > 0) {
593 593
                     IndividualChannelStream *ics = &cpe->ch[0].ics;
... ...
@@ -627,7 +627,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
627 627
     }
628 628
 
629 629
     /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
630
-    ctx->pe[channel] = pe;
630
+    ctx->ch[channel].entropy = pe;
631 631
     desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
632 632
     desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
633 633
     /* NOTE: PE correction is kept simple. During initial testing it had very
... ...
@@ -731,7 +731,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
731 731
     for (w = 0; w < wi->num_windows*16; w += 16) {
732 732
         for (g = 0; g < num_bands; g++) {
733 733
             AacPsyBand *band     = &pch->band[w+g];
734
-            FFPsyBand  *psy_band = &ctx->psy_bands[channel*PSY_MAX_BANDS+w+g];
734
+            FFPsyBand  *psy_band = &ctx->ch[channel].psy_bands[w+g];
735 735
 
736 736
             psy_band->threshold = band->thr;
737 737
             psy_band->energy    = band->energy;
... ...
@@ -921,5 +921,6 @@ const FFPsyModel ff_aac_psy_model =
921 921
     .init    = psy_3gpp_init,
922 922
     .window  = psy_lame_window,
923 923
     .analyze = psy_3gpp_analyze,
924
+    .analyze_group = NULL,
924 925
     .end     = psy_3gpp_end,
925 926
 };
... ...
@@ -25,16 +25,31 @@
25 25
 
26 26
 extern const FFPsyModel ff_aac_psy_model;
27 27
 
28
-av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
29
-                        int num_lens,
30
-                        const uint8_t **bands, const int* num_bands)
28
+av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
29
+                        const uint8_t **bands, const int* num_bands,
30
+                        int num_groups, const uint8_t *group_map)
31 31
 {
32
+    int i, j, k = 0;
33
+
32 34
     ctx->avctx = avctx;
33
-    ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels);
35
+    ctx->ch        = av_mallocz(sizeof(ctx->ch[0]) * avctx->channels * 2);
36
+    ctx->group     = av_mallocz(sizeof(ctx->group[0]) * num_groups);
34 37
     ctx->bands     = av_malloc (sizeof(ctx->bands[0])     * num_lens);
35 38
     ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens);
36 39
     memcpy(ctx->bands,     bands,     sizeof(ctx->bands[0])     *  num_lens);
37 40
     memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) *  num_lens);
41
+
42
+    /* assign channels to groups (with virtual channels for coupling) */
43
+    for (i = 0; i < num_groups; i++) {
44
+        /* NOTE: Add 1 to handle the AAC chan_config without modification.
45
+         *       This has the side effect of allowing an array of 0s to map
46
+         *       to one channel per group.
47
+         */
48
+        ctx->group[i].num_ch = group_map[i] + 1;
49
+        for (j = 0; j < ctx->group[i].num_ch * 2; j++)
50
+            ctx->group[i].ch[j]  = &ctx->ch[k++];
51
+    }
52
+
38 53
     switch (ctx->avctx->codec_id) {
39 54
     case CODEC_ID_AAC:
40 55
         ctx->model = &ff_aac_psy_model;
... ...
@@ -45,13 +60,24 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
45 45
     return 0;
46 46
 }
47 47
 
48
+FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel)
49
+{
50
+    int i = 0, ch = 0;
51
+
52
+    while (ch <= channel)
53
+        ch += ctx->group[i++].num_ch;
54
+
55
+    return &ctx->group[i-1];
56
+}
57
+
48 58
 av_cold void ff_psy_end(FFPsyContext *ctx)
49 59
 {
50 60
     if (ctx->model->end)
51 61
         ctx->model->end(ctx);
52 62
     av_freep(&ctx->bands);
53 63
     av_freep(&ctx->num_bands);
54
-    av_freep(&ctx->psy_bands);
64
+    av_freep(&ctx->group);
65
+    av_freep(&ctx->ch);
55 66
 }
56 67
 
57 68
 typedef struct FFPsyPreprocessContext{
... ...
@@ -41,6 +41,23 @@ typedef struct FFPsyBand {
41 41
 } FFPsyBand;
42 42
 
43 43
 /**
44
+ * single channel psychoacoustic information
45
+ */
46
+typedef struct FFPsyChannel {
47
+    FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information
48
+    float     entropy;                  ///< total PE for this channel
49
+} FFPsyChannel;
50
+
51
+/**
52
+ * psychoacoustic information for an arbitrary group of channels
53
+ */
54
+typedef struct FFPsyChannelGroup {
55
+    FFPsyChannel *ch[PSY_MAX_CHANS];  ///< pointers to the individual channels in the group
56
+    uint8_t num_ch;                   ///< number of channels in this group
57
+    uint8_t coupling[PSY_MAX_BANDS];  ///< allow coupling for this band in the group
58
+} FFPsyChannelGroup;
59
+
60
+/**
44 61
  * windowing related information
45 62
  */
46 63
 typedef struct FFPsyWindowInfo {
... ...
@@ -58,14 +75,14 @@ typedef struct FFPsyContext {
58 58
     AVCodecContext *avctx;            ///< encoder context
59 59
     const struct FFPsyModel *model;   ///< encoder-specific model functions
60 60
 
61
-    FFPsyBand *psy_bands;             ///< frame bands information
61
+    FFPsyChannel      *ch;            ///< single channel information
62
+    FFPsyChannelGroup *group;         ///< channel group information
63
+    int num_groups;                   ///< number of channel groups
62 64
 
63 65
     uint8_t **bands;                  ///< scalefactor band sizes for possible frame sizes
64 66
     int     *num_bands;               ///< number of scalefactor bands for possible frame sizes
65 67
     int num_lens;                     ///< number of scalefactor band sets
66 68
 
67
-    float pe[PSY_MAX_CHANS];          ///< total PE for each channel in the frame
68
-
69 69
     struct {
70 70
         int size;                     ///< size of the bitresevoir in bits
71 71
         int bits;                     ///< number of bits used in the bitresevoir
... ...
@@ -95,7 +112,7 @@ typedef struct FFPsyModel {
95 95
     FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type);
96 96
 
97 97
     /**
98
-     * Perform psychoacoustic analysis and set band info (threshold, energy).
98
+     * Perform psychoacoustic analysis and set band info (threshold, energy) for a single channel.
99 99
      *
100 100
      * @param ctx     model context
101 101
      * @param channel audio channel number
... ...
@@ -104,6 +121,16 @@ typedef struct FFPsyModel {
104 104
      */
105 105
     void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi);
106 106
 
107
+    /**
108
+     * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.
109
+     *
110
+     * @param ctx      model context
111
+     * @param channel  channel number of the first channel in the group to perform analysis on
112
+     * @param coeffs   array of pointers to the transformed coefficients
113
+     * @param wi       window information for the channels in the group
114
+     */
115
+    void (*analyze_group)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);
116
+
107 117
     void (*end)    (FFPsyContext *apc);
108 118
 } FFPsyModel;
109 119
 
... ...
@@ -115,12 +142,24 @@ typedef struct FFPsyModel {
115 115
  * @param num_lens   number of possible frame lengths
116 116
  * @param bands      scalefactor band lengths for all frame lengths
117 117
  * @param num_bands  number of scalefactor bands for all frame lengths
118
+ * @param num_groups number of channel groups
119
+ * @param group_map  array with # of channels in group - 1, for each group
118 120
  *
119 121
  * @return zero if successful, a negative value if not
120 122
  */
121
-av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx,
122
-                        int num_lens,
123
-                        const uint8_t **bands, const int* num_bands);
123
+av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
124
+                        const uint8_t **bands, const int* num_bands,
125
+                        int num_groups, const uint8_t *group_map);
126
+
127
+/**
128
+ * Determine what group a channel belongs to.
129
+ *
130
+ * @param ctx     psymodel context
131
+ * @param channel channel to locate the group for
132
+ *
133
+ * @return pointer to the FFPsyChannelGroup this channel belongs to
134
+ */
135
+FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel);
124 136
 
125 137
 /**
126 138
  * Cleanup model context at the end.