Most importantly, it contains the encoder delay and replaygain info.
Anton Khirnov authored on 2014/04/12 22:20:57... | ... |
@@ -370,7 +370,8 @@ to provide the pictures as soon as possible to avoid excessive buffering. |
370 | 370 |
A Xing/LAME frame right after the ID3v2 header (if present). It is enabled by |
371 | 371 |
default, but will be written only if the output is seekable. The |
372 | 372 |
@code{write_xing} private option can be used to disable it. The frame contains |
373 |
-various information that may be useful to the decoder, like the audio duration. |
|
373 |
+various information that may be useful to the decoder, like the audio duration |
|
374 |
+or encoder delay. |
|
374 | 375 |
|
375 | 376 |
@item |
376 | 377 |
A legacy ID3v1 tag at the end of the file (disabled by default). It may be |
... | ... |
@@ -32,6 +32,9 @@ |
32 | 32 |
#include "libavutil/opt.h" |
33 | 33 |
#include "libavutil/dict.h" |
34 | 34 |
#include "libavutil/avassert.h" |
35 |
+#include "libavutil/crc.h" |
|
36 |
+#include "libavutil/mathematics.h" |
|
37 |
+#include "libavutil/replaygain.h" |
|
35 | 38 |
|
36 | 39 |
static int id3v1_set_string(AVFormatContext *s, const char *key, |
37 | 40 |
uint8_t *buf, int buf_size) |
... | ... |
@@ -76,8 +79,8 @@ static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf) |
76 | 76 |
|
77 | 77 |
#define XING_NUM_BAGS 400 |
78 | 78 |
#define XING_TOC_SIZE 100 |
79 |
-// maximum size of the xing frame: offset/Xing/flags/frames/size/TOC |
|
80 |
-#define XING_MAX_SIZE (32 + 4 + 4 + 4 + 4 + XING_TOC_SIZE) |
|
79 |
+// size of the XING/LAME data, starting from the Xing tag |
|
80 |
+#define XING_SIZE 156 |
|
81 | 81 |
|
82 | 82 |
typedef struct MP3Context { |
83 | 83 |
const AVClass *class; |
... | ... |
@@ -87,7 +90,18 @@ typedef struct MP3Context { |
87 | 87 |
int write_xing; |
88 | 88 |
|
89 | 89 |
/* xing header */ |
90 |
- int64_t xing_offset; |
|
90 |
+ // a buffer containing the whole XING/LAME frame |
|
91 |
+ uint8_t *xing_frame; |
|
92 |
+ int xing_frame_size; |
|
93 |
+ |
|
94 |
+ AVCRC audio_crc; // CRC of the audio data |
|
95 |
+ uint32_t audio_size; // total size of the audio data |
|
96 |
+ |
|
97 |
+ // offset of the XING/LAME frame in the file |
|
98 |
+ int64_t xing_frame_offset; |
|
99 |
+ // offset of the XING/INFO tag in the frame |
|
100 |
+ int xing_offset; |
|
101 |
+ |
|
91 | 102 |
int32_t frames; |
92 | 103 |
int32_t size; |
93 | 104 |
uint32_t want; |
... | ... |
@@ -115,13 +129,15 @@ static void mp3_write_xing(AVFormatContext *s) |
115 | 115 |
{ |
116 | 116 |
MP3Context *mp3 = s->priv_data; |
117 | 117 |
AVCodecContext *codec = s->streams[mp3->audio_stream_idx]->codec; |
118 |
+ AVDictionaryEntry *enc = av_dict_get(s->streams[mp3->audio_stream_idx]->metadata, "encoder", NULL, 0); |
|
119 |
+ AVIOContext *dyn_ctx; |
|
118 | 120 |
int32_t header; |
119 | 121 |
MPADecodeHeader mpah; |
120 | 122 |
int srate_idx, i, channels; |
121 | 123 |
int bitrate_idx; |
122 | 124 |
int best_bitrate_idx; |
123 | 125 |
int best_bitrate_error = INT_MAX; |
124 |
- int xing_offset; |
|
126 |
+ int ret; |
|
125 | 127 |
int ver = 0; |
126 | 128 |
int lsf, bytes_needed; |
127 | 129 |
|
... | ... |
@@ -161,14 +177,8 @@ static void mp3_write_xing(AVFormatContext *s) |
161 | 161 |
|
162 | 162 |
lsf = !((header & (1 << 20) && header & (1 << 19))); |
163 | 163 |
|
164 |
- xing_offset = xing_offtbl[ver != 3][channels == 1]; |
|
165 |
- bytes_needed = 4 // header |
|
166 |
- + xing_offset |
|
167 |
- + 4 // xing tag |
|
168 |
- + 4 // frames/size/toc flags |
|
169 |
- + 4 // frames |
|
170 |
- + 4 // size |
|
171 |
- + XING_TOC_SIZE; // toc |
|
164 |
+ mp3->xing_offset = xing_offtbl[ver != 3][channels == 1] + 4; |
|
165 |
+ bytes_needed = mp3->xing_offset + XING_SIZE; |
|
172 | 166 |
|
173 | 167 |
for (bitrate_idx = 1; bitrate_idx < 15; bitrate_idx++) { |
174 | 168 |
int bit_rate = 1000 * avpriv_mpa_bitrate_tab[lsf][3 - 1][bitrate_idx]; |
... | ... |
@@ -192,28 +202,72 @@ static void mp3_write_xing(AVFormatContext *s) |
192 | 192 |
header &= ~mask; |
193 | 193 |
} |
194 | 194 |
|
195 |
- avio_wb32(s->pb, header); |
|
195 |
+ ret = avio_open_dyn_buf(&dyn_ctx); |
|
196 |
+ if (ret < 0) |
|
197 |
+ return; |
|
198 |
+ |
|
199 |
+ avio_wb32(dyn_ctx, header); |
|
196 | 200 |
|
197 | 201 |
avpriv_mpegaudio_decode_header(&mpah, header); |
198 | 202 |
|
199 |
- av_assert0(mpah.frame_size >= XING_MAX_SIZE); |
|
203 |
+ av_assert0(mpah.frame_size >= bytes_needed); |
|
200 | 204 |
|
201 |
- ffio_fill(s->pb, 0, xing_offset); |
|
202 |
- mp3->xing_offset = avio_tell(s->pb); |
|
203 |
- ffio_wfourcc(s->pb, "Xing"); |
|
204 |
- avio_wb32(s->pb, 0x01 | 0x02 | 0x04); // frames / size / TOC |
|
205 |
+ ffio_fill(dyn_ctx, 0, mp3->xing_offset - 4); |
|
206 |
+ ffio_wfourcc(dyn_ctx, "Xing"); |
|
207 |
+ avio_wb32(dyn_ctx, 0x01 | 0x02 | 0x04 | 0x08); // frames / size / TOC / vbr scale |
|
205 | 208 |
|
206 | 209 |
mp3->size = mpah.frame_size; |
207 | 210 |
mp3->want = 1; |
208 | 211 |
|
209 |
- avio_wb32(s->pb, 0); // frames |
|
210 |
- avio_wb32(s->pb, 0); // size |
|
212 |
+ avio_wb32(dyn_ctx, 0); // frames |
|
213 |
+ avio_wb32(dyn_ctx, 0); // size |
|
211 | 214 |
|
212 | 215 |
// TOC |
213 | 216 |
for (i = 0; i < XING_TOC_SIZE; i++) |
214 |
- avio_w8(s->pb, 255 * i / XING_TOC_SIZE); |
|
217 |
+ avio_w8(dyn_ctx, 255 * i / XING_TOC_SIZE); |
|
218 |
+ |
|
219 |
+ // vbr quality |
|
220 |
+ // we write it, because some (broken) tools always expect it to be present |
|
221 |
+ avio_wb32(dyn_ctx, 0); |
|
222 |
+ |
|
223 |
+ // encoder short version string |
|
224 |
+ if (enc) { |
|
225 |
+ uint8_t encoder_str[9] = { 0 }; |
|
226 |
+ memcpy(encoder_str, enc->value, FFMIN(strlen(enc->value), sizeof(encoder_str))); |
|
227 |
+ avio_write(dyn_ctx, encoder_str, sizeof(encoder_str)); |
|
228 |
+ } else |
|
229 |
+ ffio_fill(dyn_ctx, 0, 9); |
|
230 |
+ |
|
231 |
+ avio_w8(dyn_ctx, 0); // tag revision 0 / unknown vbr method |
|
232 |
+ avio_w8(dyn_ctx, 0); // unknown lowpass filter value |
|
233 |
+ ffio_fill(dyn_ctx, 0, 8); // empty replaygain fields |
|
234 |
+ avio_w8(dyn_ctx, 0); // unknown encoding flags |
|
235 |
+ avio_w8(dyn_ctx, 0); // unknown abr/minimal bitrate |
|
236 |
+ |
|
237 |
+ // encoder delay |
|
238 |
+ if (codec->initial_padding >= 1 << 12) { |
|
239 |
+ av_log(s, AV_LOG_WARNING, "Too many samples of initial padding.\n"); |
|
240 |
+ avio_wb24(dyn_ctx, 0); |
|
241 |
+ } else { |
|
242 |
+ avio_wb24(dyn_ctx, codec->initial_padding << 12); |
|
243 |
+ } |
|
244 |
+ |
|
245 |
+ avio_w8(dyn_ctx, 0); // misc |
|
246 |
+ avio_w8(dyn_ctx, 0); // mp3gain |
|
247 |
+ avio_wb16(dyn_ctx, 0); // preset |
|
248 |
+ |
|
249 |
+ // audio length and CRCs (will be updated later) |
|
250 |
+ avio_wb32(dyn_ctx, 0); // music length |
|
251 |
+ avio_wb16(dyn_ctx, 0); // music crc |
|
252 |
+ avio_wb16(dyn_ctx, 0); // tag crc |
|
215 | 253 |
|
216 |
- ffio_fill(s->pb, 0, mpah.frame_size - bytes_needed); |
|
254 |
+ ffio_fill(dyn_ctx, 0, mpah.frame_size - bytes_needed); |
|
255 |
+ |
|
256 |
+ mp3->xing_frame_size = avio_close_dyn_buf(dyn_ctx, &mp3->xing_frame); |
|
257 |
+ mp3->xing_frame_offset = avio_tell(s->pb); |
|
258 |
+ avio_write(s->pb, mp3->xing_frame, mp3->xing_frame_size); |
|
259 |
+ |
|
260 |
+ mp3->audio_size = mp3->xing_frame_size; |
|
217 | 261 |
} |
218 | 262 |
|
219 | 263 |
/* |
... | ... |
@@ -264,6 +318,12 @@ static int mp3_write_audio_packet(AVFormatContext *s, AVPacket *pkt) |
264 | 264 |
} |
265 | 265 |
|
266 | 266 |
mp3_xing_add_frame(mp3, pkt); |
267 |
+ |
|
268 |
+ if (mp3->xing_offset) { |
|
269 |
+ mp3->audio_size += pkt->size; |
|
270 |
+ mp3->audio_crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI_LE), |
|
271 |
+ mp3->audio_crc, pkt->data, pkt->size); |
|
272 |
+ } |
|
267 | 273 |
} |
268 | 274 |
|
269 | 275 |
return ff_raw_write_packet(s, pkt); |
... | ... |
@@ -292,26 +352,58 @@ static int mp3_queue_flush(AVFormatContext *s) |
292 | 292 |
static void mp3_update_xing(AVFormatContext *s) |
293 | 293 |
{ |
294 | 294 |
MP3Context *mp3 = s->priv_data; |
295 |
- int i; |
|
295 |
+ AVReplayGain *rg; |
|
296 |
+ uint16_t tag_crc; |
|
297 |
+ uint8_t *toc; |
|
298 |
+ int i, rg_size; |
|
296 | 299 |
|
297 | 300 |
/* replace "Xing" identification string with "Info" for CBR files. */ |
298 |
- if (!mp3->has_variable_bitrate) { |
|
299 |
- avio_seek(s->pb, mp3->xing_offset, SEEK_SET); |
|
300 |
- ffio_wfourcc(s->pb, "Info"); |
|
301 |
- } |
|
302 |
- |
|
303 |
- avio_seek(s->pb, mp3->xing_offset + 8, SEEK_SET); |
|
304 |
- avio_wb32(s->pb, mp3->frames); |
|
305 |
- avio_wb32(s->pb, mp3->size); |
|
301 |
+ if (!mp3->has_variable_bitrate) |
|
302 |
+ AV_WL32(mp3->xing_frame + mp3->xing_offset, MKTAG('I', 'n', 'f', 'o')); |
|
306 | 303 |
|
307 |
- avio_w8(s->pb, 0); // first toc entry has to be zero. |
|
304 |
+ AV_WB32(mp3->xing_frame + mp3->xing_offset + 8, mp3->frames); |
|
305 |
+ AV_WB32(mp3->xing_frame + mp3->xing_offset + 12, mp3->size); |
|
308 | 306 |
|
307 |
+ toc = mp3->xing_frame + mp3->xing_offset + 16; |
|
308 |
+ toc[0] = 0; // first toc entry has to be zero. |
|
309 | 309 |
for (i = 1; i < XING_TOC_SIZE; ++i) { |
310 | 310 |
int j = i * mp3->pos / XING_TOC_SIZE; |
311 | 311 |
int seek_point = 256LL * mp3->bag[j] / mp3->size; |
312 |
- avio_w8(s->pb, FFMIN(seek_point, 255)); |
|
312 |
+ toc[i] = FFMIN(seek_point, 255); |
|
313 |
+ } |
|
314 |
+ |
|
315 |
+ /* write replaygain */ |
|
316 |
+ rg = (AVReplayGain*)av_stream_get_side_data(s->streams[0], AV_PKT_DATA_REPLAYGAIN, |
|
317 |
+ &rg_size); |
|
318 |
+ if (rg && rg_size >= sizeof(*rg)) { |
|
319 |
+ uint16_t val; |
|
320 |
+ |
|
321 |
+ AV_WB32(mp3->xing_frame + mp3->xing_offset + 131, |
|
322 |
+ av_rescale(rg->track_peak, 1 << 23, 100000)); |
|
323 |
+ |
|
324 |
+ if (rg->track_gain != INT32_MIN) { |
|
325 |
+ val = FFABS(rg->track_gain / 10000) & ((1 << 9) - 1); |
|
326 |
+ val |= (rg->track_gain < 0) << 9; |
|
327 |
+ val |= 1 << 13; |
|
328 |
+ AV_WB16(mp3->xing_frame + mp3->xing_offset + 135, val); |
|
329 |
+ } |
|
330 |
+ |
|
331 |
+ if (rg->album_gain != INT32_MIN) { |
|
332 |
+ val = FFABS(rg->album_gain / 10000) & ((1 << 9) - 1); |
|
333 |
+ val |= (rg->album_gain < 0) << 9; |
|
334 |
+ val |= 1 << 14; |
|
335 |
+ AV_WB16(mp3->xing_frame + mp3->xing_offset + 137, val); |
|
336 |
+ } |
|
313 | 337 |
} |
314 | 338 |
|
339 |
+ AV_WB32(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 8, mp3->audio_size); |
|
340 |
+ AV_WB16(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 4, mp3->audio_crc); |
|
341 |
+ |
|
342 |
+ tag_crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI_LE), 0, mp3->xing_frame, 190); |
|
343 |
+ AV_WB16(mp3->xing_frame + mp3->xing_offset + XING_SIZE - 2, tag_crc); |
|
344 |
+ |
|
345 |
+ avio_seek(s->pb, mp3->xing_frame_offset, SEEK_SET); |
|
346 |
+ avio_write(s->pb, mp3->xing_frame, mp3->xing_frame_size); |
|
315 | 347 |
avio_seek(s->pb, 0, SEEK_END); |
316 | 348 |
} |
317 | 349 |
|
... | ... |
@@ -334,6 +426,8 @@ static int mp3_write_trailer(struct AVFormatContext *s) |
334 | 334 |
if (mp3->xing_offset) |
335 | 335 |
mp3_update_xing(s); |
336 | 336 |
|
337 |
+ av_freep(&mp3->xing_frame); |
|
338 |
+ |
|
337 | 339 |
return 0; |
338 | 340 |
} |
339 | 341 |
|