Browse code

mp3enc: write full LAME frame

Most importantly, it contains the encoder delay and replaygain info.

Anton Khirnov authored on 2014/04/12 22:20:57
Showing 2 changed files
... ...
@@ -370,7 +370,8 @@ to provide the pictures as soon as possible to avoid excessive buffering.
370 370
 A Xing/LAME frame right after the ID3v2 header (if present). It is enabled by
371 371
 default, but will be written only if the output is seekable. The
372 372
 @code{write_xing} private option can be used to disable it.  The frame contains
373
-various information that may be useful to the decoder, like the audio duration.
373
+various information that may be useful to the decoder, like the audio duration
374
+or encoder delay.
374 375
 
375 376
 @item
376 377
 A legacy ID3v1 tag at the end of the file (disabled by default). It may be
... ...
@@ -32,6 +32,9 @@
32 32
 #include "libavutil/opt.h"
33 33
 #include "libavutil/dict.h"
34 34
 #include "libavutil/avassert.h"
35
+#include "libavutil/crc.h"
36
+#include "libavutil/mathematics.h"
37
+#include "libavutil/replaygain.h"
35 38
 
36 39
 static int id3v1_set_string(AVFormatContext *s, const char *key,
37 40
                             uint8_t *buf, int buf_size)
... ...
@@ -76,8 +79,8 @@ static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf)
76 76
 
77 77
 #define XING_NUM_BAGS 400
78 78
 #define XING_TOC_SIZE 100
79
-// maximum size of the xing frame: offset/Xing/flags/frames/size/TOC
80
-#define XING_MAX_SIZE (32 + 4 + 4 + 4 + 4 + XING_TOC_SIZE)
79
+// size of the XING/LAME data, starting from the Xing tag
80
+#define XING_SIZE 156
81 81
 
82 82
 typedef struct MP3Context {
83 83
     const AVClass *class;
... ...
@@ -87,7 +90,18 @@ typedef struct MP3Context {
87 87
     int write_xing;
88 88
 
89 89
     /* xing header */
90
-    int64_t xing_offset;
90
+    // a buffer containing the whole XING/LAME frame
91
+    uint8_t *xing_frame;
92
+    int      xing_frame_size;
93
+
94
+    AVCRC    audio_crc;     // CRC of the audio data
95
+    uint32_t audio_size;    // total size of the audio data
96
+
97
+    // offset of the XING/LAME frame in the file
98
+    int64_t  xing_frame_offset;
99
+    // offset of the XING/INFO tag in the frame
100
+    int xing_offset;
101
+
91 102
     int32_t frames;
92 103
     int32_t size;
93 104
     uint32_t want;
... ...
@@ -115,13 +129,15 @@ static void mp3_write_xing(AVFormatContext *s)
115 115
 {
116 116
     MP3Context       *mp3 = s->priv_data;
117 117
     AVCodecContext *codec = s->streams[mp3->audio_stream_idx]->codec;
118
+    AVDictionaryEntry *enc = av_dict_get(s->streams[mp3->audio_stream_idx]->metadata, "encoder", NULL, 0);
119
+    AVIOContext *dyn_ctx;
118 120
     int32_t        header;
119 121
     MPADecodeHeader  mpah;
120 122
     int srate_idx, i, channels;
121 123
     int bitrate_idx;
122 124
     int best_bitrate_idx;
123 125
     int best_bitrate_error = INT_MAX;
124
-    int xing_offset;
126
+    int ret;
125 127
     int ver = 0;
126 128
     int lsf, bytes_needed;
127 129
 
... ...
@@ -161,14 +177,8 @@ static void mp3_write_xing(AVFormatContext *s)
161 161
 
162 162
     lsf = !((header & (1 << 20) && header & (1 << 19)));
163 163
 
164
-    xing_offset  = xing_offtbl[ver != 3][channels == 1];
165
-    bytes_needed = 4              // header
166
-                 + xing_offset
167
-                 + 4              // xing tag
168
-                 + 4              // frames/size/toc flags
169
-                 + 4              // frames
170
-                 + 4              // size
171
-                 + XING_TOC_SIZE; // toc
164
+    mp3->xing_offset = xing_offtbl[ver != 3][channels == 1] + 4;
165
+    bytes_needed     = mp3->xing_offset + XING_SIZE;
172 166
 
173 167
     for (bitrate_idx = 1; bitrate_idx < 15; bitrate_idx++) {
174 168
         int bit_rate = 1000 * avpriv_mpa_bitrate_tab[lsf][3 - 1][bitrate_idx];
... ...
@@ -192,28 +202,72 @@ static void mp3_write_xing(AVFormatContext *s)
192 192
         header &= ~mask;
193 193
     }
194 194
 
195
-    avio_wb32(s->pb, header);
195
+    ret = avio_open_dyn_buf(&dyn_ctx);
196
+    if (ret < 0)
197
+        return;
198
+
199
+    avio_wb32(dyn_ctx, header);
196 200
 
197 201
     avpriv_mpegaudio_decode_header(&mpah, header);
198 202
 
199
-    av_assert0(mpah.frame_size >= XING_MAX_SIZE);
203
+    av_assert0(mpah.frame_size >= bytes_needed);
200 204
 
201
-    ffio_fill(s->pb, 0, xing_offset);
202
-    mp3->xing_offset = avio_tell(s->pb);
203
-    ffio_wfourcc(s->pb, "Xing");
204
-    avio_wb32(s->pb, 0x01 | 0x02 | 0x04);  // frames / size / TOC
205
+    ffio_fill(dyn_ctx, 0, mp3->xing_offset - 4);
206
+    ffio_wfourcc(dyn_ctx, "Xing");
207
+    avio_wb32(dyn_ctx, 0x01 | 0x02 | 0x04 | 0x08);  // frames / size / TOC / vbr scale
205 208
 
206 209
     mp3->size = mpah.frame_size;
207 210
     mp3->want = 1;
208 211
 
209
-    avio_wb32(s->pb, 0);  // frames
210
-    avio_wb32(s->pb, 0);  // size
212
+    avio_wb32(dyn_ctx, 0);  // frames
213
+    avio_wb32(dyn_ctx, 0);  // size
211 214
 
212 215
     // TOC
213 216
     for (i = 0; i < XING_TOC_SIZE; i++)
214
-        avio_w8(s->pb, 255 * i / XING_TOC_SIZE);
217
+        avio_w8(dyn_ctx, 255 * i / XING_TOC_SIZE);
218
+
219
+    // vbr quality
220
+    // we write it, because some (broken) tools always expect it to be present
221
+    avio_wb32(dyn_ctx, 0);
222
+
223
+    // encoder short version string
224
+    if (enc) {
225
+        uint8_t encoder_str[9] = { 0 };
226
+        memcpy(encoder_str, enc->value, FFMIN(strlen(enc->value), sizeof(encoder_str)));
227
+        avio_write(dyn_ctx, encoder_str, sizeof(encoder_str));
228
+    } else
229
+        ffio_fill(dyn_ctx, 0, 9);
230
+
231
+    avio_w8(dyn_ctx, 0);      // tag revision 0 / unknown vbr method
232
+    avio_w8(dyn_ctx, 0);      // unknown lowpass filter value
233
+    ffio_fill(dyn_ctx, 0, 8); // empty replaygain fields
234
+    avio_w8(dyn_ctx, 0);      // unknown encoding flags
235
+    avio_w8(dyn_ctx, 0);      // unknown abr/minimal bitrate
236
+
237
+    // encoder delay
238
+    if (codec->initial_padding >= 1 << 12) {
239
+        av_log(s, AV_LOG_WARNING, "Too many samples of initial padding.\n");
240
+        avio_wb24(dyn_ctx, 0);
241
+    } else {
242
+        avio_wb24(dyn_ctx, codec->initial_padding << 12);
243
+    }
244
+
245
+    avio_w8(dyn_ctx,   0); // misc
246
+    avio_w8(dyn_ctx,   0); // mp3gain
247
+    avio_wb16(dyn_ctx, 0); // preset
248
+
249
+    // audio length and CRCs (will be updated later)
250
+    avio_wb32(dyn_ctx, 0); // music length
251
+    avio_wb16(dyn_ctx, 0); // music crc
252
+    avio_wb16(dyn_ctx, 0); // tag crc
215 253
 
216
-    ffio_fill(s->pb, 0, mpah.frame_size - bytes_needed);
254
+    ffio_fill(dyn_ctx, 0, mpah.frame_size - bytes_needed);
255
+
256
+    mp3->xing_frame_size   = avio_close_dyn_buf(dyn_ctx, &mp3->xing_frame);
257
+    mp3->xing_frame_offset = avio_tell(s->pb);
258
+    avio_write(s->pb, mp3->xing_frame, mp3->xing_frame_size);
259
+
260
+    mp3->audio_size = mp3->xing_frame_size;
217 261
 }
218 262
 
219 263
 /*
... ...
@@ -264,6 +318,12 @@ static int mp3_write_audio_packet(AVFormatContext *s, AVPacket *pkt)
264 264
         }
265 265
 
266 266
         mp3_xing_add_frame(mp3, pkt);
267
+
268
+        if (mp3->xing_offset) {
269
+            mp3->audio_size += pkt->size;
270
+            mp3->audio_crc   = av_crc(av_crc_get_table(AV_CRC_16_ANSI_LE),
271
+                                      mp3->audio_crc, pkt->data, pkt->size);
272
+        }
267 273
     }
268 274
 
269 275
     return ff_raw_write_packet(s, pkt);
... ...
@@ -292,26 +352,58 @@ static int mp3_queue_flush(AVFormatContext *s)
292 292
 static void mp3_update_xing(AVFormatContext *s)
293 293
 {
294 294
     MP3Context  *mp3 = s->priv_data;
295
-    int i;
295
+    AVReplayGain *rg;
296
+    uint16_t tag_crc;
297
+    uint8_t *toc;
298
+    int i, rg_size;
296 299
 
297 300
     /* replace "Xing" identification string with "Info" for CBR files. */
298
-    if (!mp3->has_variable_bitrate) {
299
-        avio_seek(s->pb, mp3->xing_offset, SEEK_SET);
300
-        ffio_wfourcc(s->pb, "Info");
301
-    }
302
-
303
-    avio_seek(s->pb, mp3->xing_offset + 8, SEEK_SET);
304
-    avio_wb32(s->pb, mp3->frames);
305
-    avio_wb32(s->pb, mp3->size);
301
+    if (!mp3->has_variable_bitrate)
302
+        AV_WL32(mp3->xing_frame + mp3->xing_offset, MKTAG('I', 'n', 'f', 'o'));
306 303
 
307
-    avio_w8(s->pb, 0);  // first toc entry has to be zero.
304
+    AV_WB32(mp3->xing_frame + mp3->xing_offset + 8,  mp3->frames);
305
+    AV_WB32(mp3->xing_frame + mp3->xing_offset + 12, mp3->size);
308 306
 
307
+    toc    = mp3->xing_frame + mp3->xing_offset + 16;
308
+    toc[0] = 0;  // first toc entry has to be zero.
309 309
     for (i = 1; i < XING_TOC_SIZE; ++i) {
310 310
         int j = i * mp3->pos / XING_TOC_SIZE;
311 311
         int seek_point = 256LL * mp3->bag[j] / mp3->size;
312
-        avio_w8(s->pb, FFMIN(seek_point, 255));
312
+        toc[i] = FFMIN(seek_point, 255);
313
+    }
314
+
315
+    /* write replaygain */
316
+    rg = (AVReplayGain*)av_stream_get_side_data(s->streams[0], AV_PKT_DATA_REPLAYGAIN,
317
+                                                &rg_size);
318
+    if (rg && rg_size >= sizeof(*rg)) {
319
+        uint16_t val;
320
+
321
+        AV_WB32(mp3->xing_frame + mp3->xing_offset + 131,
322
+                av_rescale(rg->track_peak, 1 << 23, 100000));
323
+
324
+        if (rg->track_gain != INT32_MIN) {
325
+            val  = FFABS(rg->track_gain / 10000) & ((1 << 9) - 1);
326
+            val |= (rg->track_gain < 0) << 9;
327
+            val |= 1 << 13;
328
+            AV_WB16(mp3->xing_frame + mp3->xing_offset + 135, val);
329
+        }
330
+
331
+        if (rg->album_gain != INT32_MIN) {
332
+            val  = FFABS(rg->album_gain / 10000) & ((1 << 9) - 1);
333
+            val |= (rg->album_gain < 0) << 9;
334
+            val |= 1 << 14;
335
+            AV_WB16(mp3->xing_frame + mp3->xing_offset + 137, val);
336
+        }
313 337
     }
314 338
 
339
+    AV_WB32(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 8, mp3->audio_size);
340
+    AV_WB16(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 4, mp3->audio_crc);
341
+
342
+    tag_crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI_LE), 0, mp3->xing_frame, 190);
343
+    AV_WB16(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 2, tag_crc);
344
+
345
+    avio_seek(s->pb,  mp3->xing_frame_offset, SEEK_SET);
346
+    avio_write(s->pb, mp3->xing_frame, mp3->xing_frame_size);
315 347
     avio_seek(s->pb, 0, SEEK_END);
316 348
 }
317 349
 
... ...
@@ -334,6 +426,8 @@ static int mp3_write_trailer(struct AVFormatContext *s)
334 334
     if (mp3->xing_offset)
335 335
         mp3_update_xing(s);
336 336
 
337
+    av_freep(&mp3->xing_frame);
338
+
337 339
     return 0;
338 340
 }
339 341