Browse code

alacenc: add support for multi-channel encoding

Justin Ruggles authored on 2012/12/05 03:46:20
Showing 6 changed files
... ...
@@ -5,6 +5,7 @@ version <next>:
5 5
 - ashowinfo audio filter
6 6
 - 24-bit FLAC encoding
7 7
 - audio volume filter
8
+- multi-channel ALAC encoding up to 7.1
8 9
 
9 10
 
10 11
 version 9_beta2:
... ...
@@ -85,8 +85,8 @@ OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
85 85
 OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
86 86
                                           ac3.o kbdwin.o
87 87
 OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
88
-OBJS-$(CONFIG_ALAC_DECODER)            += alac.o
89
-OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o
88
+OBJS-$(CONFIG_ALAC_DECODER)            += alac.o alac_data.o
89
+OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o alac_data.o
90 90
 OBJS-$(CONFIG_ALS_DECODER)             += alsdec.o bgmc.o mpeg4audio.o
91 91
 OBJS-$(CONFIG_AMRNB_DECODER)           += amrnbdec.o celp_filters.o   \
92 92
                                           celp_math.o acelp_filters.o \
... ...
@@ -52,9 +52,9 @@
52 52
 #include "internal.h"
53 53
 #include "unary.h"
54 54
 #include "mathops.h"
55
+#include "alac_data.h"
55 56
 
56 57
 #define ALAC_EXTRADATA_SIZE 36
57
-#define MAX_CHANNELS 8
58 58
 
59 59
 typedef struct {
60 60
     AVCodecContext *avctx;
... ...
@@ -76,40 +76,6 @@ typedef struct {
76 76
     int nb_samples;     /**< number of samples in the current frame */
77 77
 } ALACContext;
78 78
 
79
-enum RawDataBlockType {
80
-    /* At the moment, only SCE, CPE, LFE, and END are recognized. */
81
-    TYPE_SCE,
82
-    TYPE_CPE,
83
-    TYPE_CCE,
84
-    TYPE_LFE,
85
-    TYPE_DSE,
86
-    TYPE_PCE,
87
-    TYPE_FIL,
88
-    TYPE_END
89
-};
90
-
91
-static const uint8_t alac_channel_layout_offsets[8][8] = {
92
-    { 0 },
93
-    { 0, 1 },
94
-    { 2, 0, 1 },
95
-    { 2, 0, 1, 3 },
96
-    { 2, 0, 1, 3, 4 },
97
-    { 2, 0, 1, 4, 5, 3 },
98
-    { 2, 0, 1, 4, 5, 6, 3 },
99
-    { 2, 6, 7, 0, 1, 4, 5, 3 }
100
-};
101
-
102
-static const uint16_t alac_channel_layouts[8] = {
103
-    AV_CH_LAYOUT_MONO,
104
-    AV_CH_LAYOUT_STEREO,
105
-    AV_CH_LAYOUT_SURROUND,
106
-    AV_CH_LAYOUT_4POINT0,
107
-    AV_CH_LAYOUT_5POINT0_BACK,
108
-    AV_CH_LAYOUT_5POINT1_BACK,
109
-    AV_CH_LAYOUT_6POINT1_BACK,
110
-    AV_CH_LAYOUT_7POINT1_WIDE_BACK
111
-};
112
-
113 79
 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
114 80
 {
115 81
     unsigned int x = get_unary_0_9(gb);
... ...
@@ -431,7 +397,7 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
431 431
                              int *got_frame_ptr, AVPacket *avpkt)
432 432
 {
433 433
     ALACContext *alac = avctx->priv_data;
434
-    enum RawDataBlockType element;
434
+    enum AlacRawDataBlockType element;
435 435
     int channels;
436 436
     int ch, ret, got_end;
437 437
 
... ...
@@ -458,7 +424,7 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
458 458
         }
459 459
 
460 460
         ret = decode_element(avctx, data,
461
-                             alac_channel_layout_offsets[alac->channels - 1][ch],
461
+                             ff_alac_channel_layout_offsets[alac->channels - 1][ch],
462 462
                              channels);
463 463
         if (ret < 0 && get_bits_left(&alac->gb))
464 464
             return ret;
... ...
@@ -581,17 +547,17 @@ static av_cold int alac_decode_init(AVCodecContext * avctx)
581 581
         av_log(avctx, AV_LOG_WARNING, "Invalid channel count\n");
582 582
         alac->channels = avctx->channels;
583 583
     } else {
584
-        if (alac->channels > MAX_CHANNELS)
584
+        if (alac->channels > ALAC_MAX_CHANNELS)
585 585
             alac->channels = avctx->channels;
586 586
         else
587 587
             avctx->channels = alac->channels;
588 588
     }
589
-    if (avctx->channels > MAX_CHANNELS) {
589
+    if (avctx->channels > ALAC_MAX_CHANNELS) {
590 590
         av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
591 591
                avctx->channels);
592 592
         return AVERROR_PATCHWELCOME;
593 593
     }
594
-    avctx->channel_layout = alac_channel_layouts[alac->channels - 1];
594
+    avctx->channel_layout = ff_alac_channel_layouts[alac->channels - 1];
595 595
 
596 596
     if ((ret = allocate_buffers(alac)) < 0) {
597 597
         av_log(avctx, AV_LOG_ERROR, "Error allocating buffers\n");
598 598
new file mode 100644
... ...
@@ -0,0 +1,56 @@
0
+/*
1
+ * ALAC encoder and decoder common data
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#include "libavutil/channel_layout.h"
21
+#include "alac_data.h"
22
+
23
+const uint8_t ff_alac_channel_layout_offsets[ALAC_MAX_CHANNELS][ALAC_MAX_CHANNELS] = {
24
+    { 0 },
25
+    { 0, 1 },
26
+    { 2, 0, 1 },
27
+    { 2, 0, 1, 3 },
28
+    { 2, 0, 1, 3, 4 },
29
+    { 2, 0, 1, 4, 5, 3 },
30
+    { 2, 0, 1, 4, 5, 6, 3 },
31
+    { 2, 6, 7, 0, 1, 4, 5, 3 }
32
+};
33
+
34
+const uint64_t ff_alac_channel_layouts[ALAC_MAX_CHANNELS + 1] = {
35
+    AV_CH_LAYOUT_MONO,
36
+    AV_CH_LAYOUT_STEREO,
37
+    AV_CH_LAYOUT_SURROUND,
38
+    AV_CH_LAYOUT_4POINT0,
39
+    AV_CH_LAYOUT_5POINT0_BACK,
40
+    AV_CH_LAYOUT_5POINT1_BACK,
41
+    AV_CH_LAYOUT_6POINT1_BACK,
42
+    AV_CH_LAYOUT_7POINT1_WIDE_BACK,
43
+    0
44
+};
45
+
46
+const enum AlacRawDataBlockType ff_alac_channel_elements[ALAC_MAX_CHANNELS][5] = {
47
+    { TYPE_SCE,                                         },
48
+    { TYPE_CPE,                                         },
49
+    { TYPE_SCE, TYPE_CPE,                               },
50
+    { TYPE_SCE, TYPE_CPE, TYPE_SCE                      },
51
+    { TYPE_SCE, TYPE_CPE, TYPE_CPE,                     },
52
+    { TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE,           },
53
+    { TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_SCE, },
54
+    { TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, },
55
+};
0 56
new file mode 100644
... ...
@@ -0,0 +1,46 @@
0
+/*
1
+ * ALAC encoder and decoder common data
2
+ *
3
+ * This file is part of Libav.
4
+ *
5
+ * Libav is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * Libav is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with Libav; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVCODEC_ALAC_DATA_H
21
+#define AVCODEC_ALAC_DATA_H
22
+
23
+#include <stdint.h>
24
+
25
+enum AlacRawDataBlockType {
26
+    /* At the moment, only SCE, CPE, LFE, and END are recognized. */
27
+    TYPE_SCE,
28
+    TYPE_CPE,
29
+    TYPE_CCE,
30
+    TYPE_LFE,
31
+    TYPE_DSE,
32
+    TYPE_PCE,
33
+    TYPE_FIL,
34
+    TYPE_END
35
+};
36
+
37
+#define ALAC_MAX_CHANNELS 8
38
+
39
+extern const uint8_t ff_alac_channel_layout_offsets[ALAC_MAX_CHANNELS][ALAC_MAX_CHANNELS];
40
+
41
+extern const uint64_t ff_alac_channel_layouts[ALAC_MAX_CHANNELS + 1];
42
+
43
+extern const enum AlacRawDataBlockType ff_alac_channel_elements[ALAC_MAX_CHANNELS][5];
44
+
45
+#endif /* AVCODEC_ALAC_DATA_H */
... ...
@@ -25,9 +25,9 @@
25 25
 #include "internal.h"
26 26
 #include "lpc.h"
27 27
 #include "mathops.h"
28
+#include "alac_data.h"
28 29
 
29 30
 #define DEFAULT_FRAME_SIZE        4096
30
-#define MAX_CHANNELS              8
31 31
 #define ALAC_EXTRADATA_SIZE       36
32 32
 #define ALAC_FRAME_HEADER_SIZE    55
33 33
 #define ALAC_FRAME_FOOTER_SIZE    3
... ...
@@ -66,27 +66,27 @@ typedef struct AlacEncodeContext {
66 66
     int max_coded_frame_size;
67 67
     int write_sample_size;
68 68
     int extra_bits;
69
-    int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE];
69
+    int32_t sample_buf[2][DEFAULT_FRAME_SIZE];
70 70
     int32_t predictor_buf[DEFAULT_FRAME_SIZE];
71 71
     int interlacing_shift;
72 72
     int interlacing_leftweight;
73 73
     PutBitContext pbctx;
74 74
     RiceContext rc;
75
-    AlacLPCContext lpc[MAX_CHANNELS];
75
+    AlacLPCContext lpc[2];
76 76
     LPCContext lpc_ctx;
77 77
     AVCodecContext *avctx;
78 78
 } AlacEncodeContext;
79 79
 
80 80
 
81
-static void init_sample_buffers(AlacEncodeContext *s,
82
-                                uint8_t * const *samples)
81
+static void init_sample_buffers(AlacEncodeContext *s, int channels,
82
+                                uint8_t const *samples[2])
83 83
 {
84 84
     int ch, i;
85 85
     int shift = av_get_bytes_per_sample(s->avctx->sample_fmt) * 8 -
86 86
                 s->avctx->bits_per_raw_sample;
87 87
 
88 88
 #define COPY_SAMPLES(type) do {                             \
89
-        for (ch = 0; ch < s->avctx->channels; ch++) {       \
89
+        for (ch = 0; ch < channels; ch++) {                 \
90 90
             int32_t       *bptr = s->sample_buf[ch];        \
91 91
             const type *sptr = (const type *)samples[ch];   \
92 92
             for (i = 0; i < s->frame_size; i++)             \
... ...
@@ -128,15 +128,18 @@ static void encode_scalar(AlacEncodeContext *s, int x,
128 128
     }
129 129
 }
130 130
 
131
-static void write_frame_header(AlacEncodeContext *s)
131
+static void write_element_header(AlacEncodeContext *s,
132
+                                 enum AlacRawDataBlockType element,
133
+                                 int instance)
132 134
 {
133 135
     int encode_fs = 0;
134 136
 
135 137
     if (s->frame_size < DEFAULT_FRAME_SIZE)
136 138
         encode_fs = 1;
137 139
 
138
-    put_bits(&s->pbctx, 3,  s->avctx->channels-1);  // No. of channels -1
139
-    put_bits(&s->pbctx, 16, 0);                     // Seems to be zero
140
+    put_bits(&s->pbctx, 3,  element);               // element type
141
+    put_bits(&s->pbctx, 4,  instance);              // element instance
142
+    put_bits(&s->pbctx, 12, 0);                     // unused header bits
140 143
     put_bits(&s->pbctx, 1,  encode_fs);             // Sample count is in the header
141 144
     put_bits(&s->pbctx, 2,  s->extra_bits >> 3);    // Extra bytes (for 24-bit)
142 145
     put_bits(&s->pbctx, 1,  s->verbatim);           // Audio block is verbatim
... ...
@@ -355,42 +358,51 @@ static void alac_entropy_coder(AlacEncodeContext *s)
355 355
     }
356 356
 }
357 357
 
358
-static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
359
-                       uint8_t * const *samples)
358
+static void write_element(AlacEncodeContext *s,
359
+                          enum AlacRawDataBlockType element, int instance,
360
+                          const uint8_t *samples0, const uint8_t *samples1)
360 361
 {
361
-    int i, j;
362
+    uint8_t const *samples[2] = { samples0, samples1 };
363
+    int i, j, channels;
362 364
     int prediction_type = 0;
363 365
     PutBitContext *pb = &s->pbctx;
364 366
 
365
-    init_put_bits(pb, avpkt->data, avpkt->size);
367
+    channels = element == TYPE_CPE ? 2 : 1;
366 368
 
367 369
     if (s->verbatim) {
368
-        write_frame_header(s);
370
+        write_element_header(s, element, instance);
369 371
         /* samples are channel-interleaved in verbatim mode */
370 372
         if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
371 373
             int shift = 32 - s->avctx->bits_per_raw_sample;
372
-            int32_t * const *samples_s32 = (int32_t * const *)samples;
374
+            int32_t const *samples_s32[2] = { (const int32_t *)samples0,
375
+                                              (const int32_t *)samples1 };
373 376
             for (i = 0; i < s->frame_size; i++)
374
-                for (j = 0; j < s->avctx->channels; j++)
377
+                for (j = 0; j < channels; j++)
375 378
                     put_sbits(pb, s->avctx->bits_per_raw_sample,
376 379
                               samples_s32[j][i] >> shift);
377 380
         } else {
378
-            int16_t * const *samples_s16 = (int16_t * const *)samples;
381
+            int16_t const *samples_s16[2] = { (const int16_t *)samples0,
382
+                                              (const int16_t *)samples1 };
379 383
             for (i = 0; i < s->frame_size; i++)
380
-                for (j = 0; j < s->avctx->channels; j++)
384
+                for (j = 0; j < channels; j++)
381 385
                     put_sbits(pb, s->avctx->bits_per_raw_sample,
382 386
                               samples_s16[j][i]);
383 387
         }
384 388
     } else {
385
-        init_sample_buffers(s, samples);
386
-        write_frame_header(s);
389
+        s->write_sample_size = s->avctx->bits_per_raw_sample - s->extra_bits +
390
+                               channels - 1;
387 391
 
388
-        if (s->avctx->channels == 2)
392
+        init_sample_buffers(s, channels, samples);
393
+        write_element_header(s, element, instance);
394
+
395
+        if (channels == 2)
389 396
             alac_stereo_decorrelation(s);
397
+        else
398
+            s->interlacing_shift = s->interlacing_leftweight = 0;
390 399
         put_bits(pb, 8, s->interlacing_shift);
391 400
         put_bits(pb, 8, s->interlacing_leftweight);
392 401
 
393
-        for (i = 0; i < s->avctx->channels; i++) {
402
+        for (i = 0; i < channels; i++) {
394 403
             calc_predictor_params(s, i);
395 404
 
396 405
             put_bits(pb, 4, prediction_type);
... ...
@@ -407,7 +419,7 @@ static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
407 407
         if (s->extra_bits) {
408 408
             uint32_t mask = (1 << s->extra_bits) - 1;
409 409
             for (i = 0; i < s->frame_size; i++) {
410
-                for (j = 0; j < s->avctx->channels; j++) {
410
+                for (j = 0; j < channels; j++) {
411 411
                     put_bits(pb, s->extra_bits, s->sample_buf[j][i] & mask);
412 412
                     s->sample_buf[j][i] >>= s->extra_bits;
413 413
                 }
... ...
@@ -415,8 +427,7 @@ static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
415 415
         }
416 416
 
417 417
         // apply lpc and entropy coding to audio samples
418
-
419
-        for (i = 0; i < s->avctx->channels; i++) {
418
+        for (i = 0; i < channels; i++) {
420 419
             alac_linear_predictor(s, i);
421 420
 
422 421
             // TODO: determine when this will actually help. for now it's not used.
... ...
@@ -425,12 +436,39 @@ static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
425 425
                 for (j = s->frame_size - 1; j > 0; j--)
426 426
                     s->predictor_buf[j] -= s->predictor_buf[j - 1];
427 427
             }
428
-
429 428
             alac_entropy_coder(s);
430 429
         }
431 430
     }
432
-    put_bits(pb, 3, 7);
431
+}
432
+
433
+static int write_frame(AlacEncodeContext *s, AVPacket *avpkt,
434
+                       uint8_t * const *samples)
435
+{
436
+    PutBitContext *pb = &s->pbctx;
437
+    const enum AlacRawDataBlockType *ch_elements = ff_alac_channel_elements[s->avctx->channels - 1];
438
+    const uint8_t *ch_map = ff_alac_channel_layout_offsets[s->avctx->channels - 1];
439
+    int ch, element, sce, cpe;
440
+
441
+    init_put_bits(pb, avpkt->data, avpkt->size);
442
+
443
+    ch = element = sce = cpe = 0;
444
+    while (ch < s->avctx->channels) {
445
+        if (ch_elements[element] == TYPE_CPE) {
446
+            write_element(s, TYPE_CPE, cpe, samples[ch_map[ch]],
447
+                          samples[ch_map[ch + 1]]);
448
+            cpe++;
449
+            ch += 2;
450
+        } else {
451
+            write_element(s, TYPE_SCE, sce, samples[ch_map[ch]], NULL);
452
+            sce++;
453
+            ch++;
454
+        }
455
+        element++;
456
+    }
457
+
458
+    put_bits(pb, 3, TYPE_END);
433 459
     flush_put_bits(pb);
460
+
434 461
     return put_bits_count(pb) >> 3;
435 462
 }
436 463
 
... ...
@@ -458,14 +496,6 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
458 458
 
459 459
     avctx->frame_size = s->frame_size = DEFAULT_FRAME_SIZE;
460 460
 
461
-    /* TODO: Correctly implement multi-channel ALAC.
462
-             It is similar to multi-channel AAC, in that it has a series of
463
-             single-channel (SCE), channel-pair (CPE), and LFE elements. */
464
-    if (avctx->channels > 2) {
465
-        av_log(avctx, AV_LOG_ERROR, "only mono or stereo input is currently supported\n");
466
-        return AVERROR_PATCHWELCOME;
467
-    }
468
-
469 461
     if (avctx->sample_fmt == AV_SAMPLE_FMT_S32P) {
470 462
         if (avctx->bits_per_raw_sample != 24)
471 463
             av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
... ...
@@ -597,8 +627,6 @@ static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
597 597
         s->verbatim   = 1;
598 598
         s->extra_bits = 0;
599 599
     }
600
-    s->write_sample_size = avctx->bits_per_raw_sample - s->extra_bits +
601
-                           avctx->channels - 1;
602 600
 
603 601
     out_bytes = write_frame(s, avpkt, frame->extended_data);
604 602
 
... ...
@@ -606,7 +634,6 @@ static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
606 606
         /* frame too large. use verbatim mode */
607 607
         s->verbatim = 1;
608 608
         s->extra_bits = 0;
609
-        s->write_sample_size = avctx->bits_per_raw_sample + avctx->channels - 1;
610 609
         out_bytes = write_frame(s, avpkt, frame->extended_data);
611 610
     }
612 611
 
... ...
@@ -624,6 +651,7 @@ AVCodec ff_alac_encoder = {
624 624
     .encode2        = alac_encode_frame,
625 625
     .close          = alac_encode_close,
626 626
     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
627
+    .channel_layouts = ff_alac_channel_layouts,
627 628
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
628 629
                                                      AV_SAMPLE_FMT_S16P,
629 630
                                                      AV_SAMPLE_FMT_NONE },