| ... | ... |
@@ -345,7 +345,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce |
| 345 | 345 |
float cost_stay_here, cost_get_here; |
| 346 | 346 |
float rd = 0.0f; |
| 347 | 347 |
for (w = 0; w < group_len; w++) {
|
| 348 |
- FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb]; |
|
| 348 |
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; |
|
| 349 | 349 |
rd += quantize_band_cost(s, sce->coeffs + start + w*128, |
| 350 | 350 |
s->scoefs + start + w*128, size, |
| 351 | 351 |
sce->sf_idx[(win+w)*16+swb], cb, |
| ... | ... |
@@ -625,7 +625,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, |
| 625 | 625 |
qmin = INT_MAX; |
| 626 | 626 |
qmax = 0.0f; |
| 627 | 627 |
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
| 628 |
- FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; |
|
| 628 |
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
|
| 629 | 629 |
if (band->energy <= band->threshold || band->threshold == 0.0f) {
|
| 630 | 630 |
sce->zeroes[(w+w2)*16+g] = 1; |
| 631 | 631 |
continue; |
| ... | ... |
@@ -654,7 +654,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, |
| 654 | 654 |
float dist = 0; |
| 655 | 655 |
int cb = find_min_book(maxval, sce->sf_idx[w*16+g]); |
| 656 | 656 |
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
| 657 |
- FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; |
|
| 657 |
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
|
| 658 | 658 |
dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], |
| 659 | 659 |
q + q0, cb, lambda / band->threshold, INFINITY, NULL); |
| 660 | 660 |
} |
| ... | ... |
@@ -727,7 +727,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, |
| 727 | 727 |
int nz = 0; |
| 728 | 728 |
float uplim = 0.0f; |
| 729 | 729 |
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
| 730 |
- FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; |
|
| 730 |
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
|
| 731 | 731 |
uplim += band->threshold; |
| 732 | 732 |
if (band->energy <= band->threshold || band->threshold == 0.0f) {
|
| 733 | 733 |
sce->zeroes[(w+w2)*16+g] = 1; |
| ... | ... |
@@ -1027,7 +1027,7 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, |
| 1027 | 1027 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
|
| 1028 | 1028 |
for (g = 0; g < sce->ics.num_swb; g++) {
|
| 1029 | 1029 |
for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
|
| 1030 |
- FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g]; |
|
| 1030 |
+ FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; |
|
| 1031 | 1031 |
if (band->energy <= band->threshold) {
|
| 1032 | 1032 |
sce->sf_idx[(w+w2)*16+g] = 218; |
| 1033 | 1033 |
sce->zeroes[(w+w2)*16+g] = 1; |
| ... | ... |
@@ -1065,8 +1065,8 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, |
| 1065 | 1065 |
if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
|
| 1066 | 1066 |
float dist1 = 0.0f, dist2 = 0.0f; |
| 1067 | 1067 |
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
|
| 1068 |
- FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g]; |
|
| 1069 |
- FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g]; |
|
| 1068 |
+ FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; |
|
| 1069 |
+ FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; |
|
| 1070 | 1070 |
float minthr = FFMIN(band0->threshold, band1->threshold); |
| 1071 | 1071 |
float maxthr = FFMAX(band0->threshold, band1->threshold); |
| 1072 | 1072 |
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
|
| ... | ... |
@@ -210,7 +210,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx) |
| 210 | 210 |
sizes[1] = swb_size_128[i]; |
| 211 | 211 |
lengths[0] = ff_aac_num_swb_1024[i]; |
| 212 | 212 |
lengths[1] = ff_aac_num_swb_128[i]; |
| 213 |
- ff_psy_init(&s->psy, avctx, 2, sizes, lengths); |
|
| 213 |
+ ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], &s->chan_map[1]); |
|
| 214 | 214 |
s->psypp = ff_psy_preprocess_init(avctx); |
| 215 | 215 |
s->coder = &ff_aac_coders[2]; |
| 216 | 216 |
|
| ... | ... |
@@ -570,8 +570,8 @@ static int aac_encode_frame(AVCodecContext *avctx, |
| 570 | 570 |
put_bits(&s->pb, 3, tag); |
| 571 | 571 |
put_bits(&s->pb, 4, chan_el_counter[tag]++); |
| 572 | 572 |
for (ch = 0; ch < chans; ch++) {
|
| 573 |
- s->cur_channel = start_ch + ch; |
|
| 574 |
- s->psy.model->analyze(&s->psy, s->cur_channel, cpe->ch[ch].coeffs, &wi[ch]); |
|
| 573 |
+ s->cur_channel = start_ch * 2 + ch; |
|
| 574 |
+ s->psy.model->analyze(&s->psy, start_ch + ch, cpe->ch[ch].coeffs, &wi[ch]); |
|
| 575 | 575 |
s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); |
| 576 | 576 |
} |
| 577 | 577 |
cpe->common_window = 0; |
| ... | ... |
@@ -587,7 +587,7 @@ static int aac_encode_frame(AVCodecContext *avctx, |
| 587 | 587 |
} |
| 588 | 588 |
} |
| 589 | 589 |
} |
| 590 |
- s->cur_channel = start_ch; |
|
| 590 |
+ s->cur_channel = start_ch * 2; |
|
| 591 | 591 |
if (s->options.stereo_mode && cpe->common_window) {
|
| 592 | 592 |
if (s->options.stereo_mode > 0) {
|
| 593 | 593 |
IndividualChannelStream *ics = &cpe->ch[0].ics; |
| ... | ... |
@@ -627,7 +627,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, |
| 627 | 627 |
} |
| 628 | 628 |
|
| 629 | 629 |
/* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */ |
| 630 |
- ctx->pe[channel] = pe; |
|
| 630 |
+ ctx->ch[channel].entropy = pe; |
|
| 631 | 631 |
desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8); |
| 632 | 632 |
desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); |
| 633 | 633 |
/* NOTE: PE correction is kept simple. During initial testing it had very |
| ... | ... |
@@ -731,7 +731,7 @@ static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, |
| 731 | 731 |
for (w = 0; w < wi->num_windows*16; w += 16) {
|
| 732 | 732 |
for (g = 0; g < num_bands; g++) {
|
| 733 | 733 |
AacPsyBand *band = &pch->band[w+g]; |
| 734 |
- FFPsyBand *psy_band = &ctx->psy_bands[channel*PSY_MAX_BANDS+w+g]; |
|
| 734 |
+ FFPsyBand *psy_band = &ctx->ch[channel].psy_bands[w+g]; |
|
| 735 | 735 |
|
| 736 | 736 |
psy_band->threshold = band->thr; |
| 737 | 737 |
psy_band->energy = band->energy; |
| ... | ... |
@@ -921,5 +921,6 @@ const FFPsyModel ff_aac_psy_model = |
| 921 | 921 |
.init = psy_3gpp_init, |
| 922 | 922 |
.window = psy_lame_window, |
| 923 | 923 |
.analyze = psy_3gpp_analyze, |
| 924 |
+ .analyze_group = NULL, |
|
| 924 | 925 |
.end = psy_3gpp_end, |
| 925 | 926 |
}; |
| ... | ... |
@@ -25,16 +25,31 @@ |
| 25 | 25 |
|
| 26 | 26 |
extern const FFPsyModel ff_aac_psy_model; |
| 27 | 27 |
|
| 28 |
-av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
|
| 29 |
- int num_lens, |
|
| 30 |
- const uint8_t **bands, const int* num_bands) |
|
| 28 |
+av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, |
|
| 29 |
+ const uint8_t **bands, const int* num_bands, |
|
| 30 |
+ int num_groups, const uint8_t *group_map) |
|
| 31 | 31 |
{
|
| 32 |
+ int i, j, k = 0; |
|
| 33 |
+ |
|
| 32 | 34 |
ctx->avctx = avctx; |
| 33 |
- ctx->psy_bands = av_mallocz(sizeof(FFPsyBand) * PSY_MAX_BANDS * avctx->channels); |
|
| 35 |
+ ctx->ch = av_mallocz(sizeof(ctx->ch[0]) * avctx->channels * 2); |
|
| 36 |
+ ctx->group = av_mallocz(sizeof(ctx->group[0]) * num_groups); |
|
| 34 | 37 |
ctx->bands = av_malloc (sizeof(ctx->bands[0]) * num_lens); |
| 35 | 38 |
ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens); |
| 36 | 39 |
memcpy(ctx->bands, bands, sizeof(ctx->bands[0]) * num_lens); |
| 37 | 40 |
memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) * num_lens); |
| 41 |
+ |
|
| 42 |
+ /* assign channels to groups (with virtual channels for coupling) */ |
|
| 43 |
+ for (i = 0; i < num_groups; i++) {
|
|
| 44 |
+ /* NOTE: Add 1 to handle the AAC chan_config without modification. |
|
| 45 |
+ * This has the side effect of allowing an array of 0s to map |
|
| 46 |
+ * to one channel per group. |
|
| 47 |
+ */ |
|
| 48 |
+ ctx->group[i].num_ch = group_map[i] + 1; |
|
| 49 |
+ for (j = 0; j < ctx->group[i].num_ch * 2; j++) |
|
| 50 |
+ ctx->group[i].ch[j] = &ctx->ch[k++]; |
|
| 51 |
+ } |
|
| 52 |
+ |
|
| 38 | 53 |
switch (ctx->avctx->codec_id) {
|
| 39 | 54 |
case CODEC_ID_AAC: |
| 40 | 55 |
ctx->model = &ff_aac_psy_model; |
| ... | ... |
@@ -45,13 +60,24 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
| 45 | 45 |
return 0; |
| 46 | 46 |
} |
| 47 | 47 |
|
| 48 |
+FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel) |
|
| 49 |
+{
|
|
| 50 |
+ int i = 0, ch = 0; |
|
| 51 |
+ |
|
| 52 |
+ while (ch <= channel) |
|
| 53 |
+ ch += ctx->group[i++].num_ch; |
|
| 54 |
+ |
|
| 55 |
+ return &ctx->group[i-1]; |
|
| 56 |
+} |
|
| 57 |
+ |
|
| 48 | 58 |
av_cold void ff_psy_end(FFPsyContext *ctx) |
| 49 | 59 |
{
|
| 50 | 60 |
if (ctx->model->end) |
| 51 | 61 |
ctx->model->end(ctx); |
| 52 | 62 |
av_freep(&ctx->bands); |
| 53 | 63 |
av_freep(&ctx->num_bands); |
| 54 |
- av_freep(&ctx->psy_bands); |
|
| 64 |
+ av_freep(&ctx->group); |
|
| 65 |
+ av_freep(&ctx->ch); |
|
| 55 | 66 |
} |
| 56 | 67 |
|
| 57 | 68 |
typedef struct FFPsyPreprocessContext{
|
| ... | ... |
@@ -41,6 +41,23 @@ typedef struct FFPsyBand {
|
| 41 | 41 |
} FFPsyBand; |
| 42 | 42 |
|
| 43 | 43 |
/** |
| 44 |
+ * single channel psychoacoustic information |
|
| 45 |
+ */ |
|
| 46 |
+typedef struct FFPsyChannel {
|
|
| 47 |
+ FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information |
|
| 48 |
+ float entropy; ///< total PE for this channel |
|
| 49 |
+} FFPsyChannel; |
|
| 50 |
+ |
|
| 51 |
+/** |
|
| 52 |
+ * psychoacoustic information for an arbitrary group of channels |
|
| 53 |
+ */ |
|
| 54 |
+typedef struct FFPsyChannelGroup {
|
|
| 55 |
+ FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group |
|
| 56 |
+ uint8_t num_ch; ///< number of channels in this group |
|
| 57 |
+ uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group |
|
| 58 |
+} FFPsyChannelGroup; |
|
| 59 |
+ |
|
| 60 |
+/** |
|
| 44 | 61 |
* windowing related information |
| 45 | 62 |
*/ |
| 46 | 63 |
typedef struct FFPsyWindowInfo {
|
| ... | ... |
@@ -58,14 +75,14 @@ typedef struct FFPsyContext {
|
| 58 | 58 |
AVCodecContext *avctx; ///< encoder context |
| 59 | 59 |
const struct FFPsyModel *model; ///< encoder-specific model functions |
| 60 | 60 |
|
| 61 |
- FFPsyBand *psy_bands; ///< frame bands information |
|
| 61 |
+ FFPsyChannel *ch; ///< single channel information |
|
| 62 |
+ FFPsyChannelGroup *group; ///< channel group information |
|
| 63 |
+ int num_groups; ///< number of channel groups |
|
| 62 | 64 |
|
| 63 | 65 |
uint8_t **bands; ///< scalefactor band sizes for possible frame sizes |
| 64 | 66 |
int *num_bands; ///< number of scalefactor bands for possible frame sizes |
| 65 | 67 |
int num_lens; ///< number of scalefactor band sets |
| 66 | 68 |
|
| 67 |
- float pe[PSY_MAX_CHANS]; ///< total PE for each channel in the frame |
|
| 68 |
- |
|
| 69 | 69 |
struct {
|
| 70 | 70 |
int size; ///< size of the bitresevoir in bits |
| 71 | 71 |
int bits; ///< number of bits used in the bitresevoir |
| ... | ... |
@@ -95,7 +112,7 @@ typedef struct FFPsyModel {
|
| 95 | 95 |
FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); |
| 96 | 96 |
|
| 97 | 97 |
/** |
| 98 |
- * Perform psychoacoustic analysis and set band info (threshold, energy). |
|
| 98 |
+ * Perform psychoacoustic analysis and set band info (threshold, energy) for a single channel. |
|
| 99 | 99 |
* |
| 100 | 100 |
* @param ctx model context |
| 101 | 101 |
* @param channel audio channel number |
| ... | ... |
@@ -104,6 +121,16 @@ typedef struct FFPsyModel {
|
| 104 | 104 |
*/ |
| 105 | 105 |
void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, const FFPsyWindowInfo *wi); |
| 106 | 106 |
|
| 107 |
+ /** |
|
| 108 |
+ * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. |
|
| 109 |
+ * |
|
| 110 |
+ * @param ctx model context |
|
| 111 |
+ * @param channel channel number of the first channel in the group to perform analysis on |
|
| 112 |
+ * @param coeffs array of pointers to the transformed coefficients |
|
| 113 |
+ * @param wi window information for the channels in the group |
|
| 114 |
+ */ |
|
| 115 |
+ void (*analyze_group)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); |
|
| 116 |
+ |
|
| 107 | 117 |
void (*end) (FFPsyContext *apc); |
| 108 | 118 |
} FFPsyModel; |
| 109 | 119 |
|
| ... | ... |
@@ -115,12 +142,24 @@ typedef struct FFPsyModel {
|
| 115 | 115 |
* @param num_lens number of possible frame lengths |
| 116 | 116 |
* @param bands scalefactor band lengths for all frame lengths |
| 117 | 117 |
* @param num_bands number of scalefactor bands for all frame lengths |
| 118 |
+ * @param num_groups number of channel groups |
|
| 119 |
+ * @param group_map array with # of channels in group - 1, for each group |
|
| 118 | 120 |
* |
| 119 | 121 |
* @return zero if successful, a negative value if not |
| 120 | 122 |
*/ |
| 121 |
-av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, |
|
| 122 |
- int num_lens, |
|
| 123 |
- const uint8_t **bands, const int* num_bands); |
|
| 123 |
+av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, |
|
| 124 |
+ const uint8_t **bands, const int* num_bands, |
|
| 125 |
+ int num_groups, const uint8_t *group_map); |
|
| 126 |
+ |
|
| 127 |
+/** |
|
| 128 |
+ * Determine what group a channel belongs to. |
|
| 129 |
+ * |
|
| 130 |
+ * @param ctx psymodel context |
|
| 131 |
+ * @param channel channel to locate the group for |
|
| 132 |
+ * |
|
| 133 |
+ * @return pointer to the FFPsyChannelGroup this channel belongs to |
|
| 134 |
+ */ |
|
| 135 |
+FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); |
|
| 124 | 136 |
|
| 125 | 137 |
/** |
| 126 | 138 |
* Cleanup model context at the end. |