This should fix trac tickets #1163, #2502.
Stefano Sabatini authored on 2013/10/02 23:22:17... | ... |
@@ -337,6 +337,39 @@ A writer may accept one or more arguments, which specify the options |
337 | 337 |
to adopt. The options are specified as a list of @var{key}=@var{value} |
338 | 338 |
pairs, separated by ":". |
339 | 339 |
|
340 |
+All writers support the following options: |
|
341 |
+ |
|
342 |
+@table @option |
|
343 |
+@item string_validation, sv |
|
344 |
+Set string validation mode. |
|
345 |
+ |
|
346 |
+The following values are accepted. |
|
347 |
+@table @samp |
|
348 |
+@item fail |
|
349 |
+The writer will fail immediately in case an invalid string (UTF-8) |
|
350 |
+sequence or code point is found in the input. This is especially |
|
351 |
+useful to validate input metadata. |
|
352 |
+ |
|
353 |
+@item ignore |
|
354 |
+Any validation error will be ignored. This will result in possibly |
|
355 |
+broken output, especially with the json or xml writer. |
|
356 |
+ |
|
357 |
+@item replace |
|
358 |
+The writer will substitute invalid UTF-8 sequences or code points with |
|
359 |
+the string specified with the @option{string_validation_replacement}. |
|
360 |
+@end table |
|
361 |
+ |
|
362 |
+Default value is @samp{replace}. |
|
363 |
+ |
|
364 |
+@item string_validation_replacement, svr |
|
365 |
+Set replacement string to use in case @option{string_validation} is |
|
366 |
+set to @samp{replace}. |
|
367 |
+ |
|
368 |
+In case the option is not specified, the writer will assume the empty |
|
369 |
+string, that is it will remove the invalid sequences from the input |
|
370 |
+strings. |
|
371 |
+@end table |
|
372 |
+ |
|
340 | 373 |
A description of the currently available writers follows. |
341 | 374 |
|
342 | 375 |
@section default |
... | ... |
@@ -258,6 +258,13 @@ typedef struct WriterContext WriterContext; |
258 | 258 |
#define WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS 1 |
259 | 259 |
#define WRITER_FLAG_PUT_PACKETS_AND_FRAMES_IN_SAME_CHAPTER 2 |
260 | 260 |
|
261 |
+typedef enum { |
|
262 |
+ WRITER_STRING_VALIDATION_FAIL, |
|
263 |
+ WRITER_STRING_VALIDATION_REPLACE, |
|
264 |
+ WRITER_STRING_VALIDATION_IGNORE, |
|
265 |
+ WRITER_STRING_VALIDATION_NB, |
|
266 |
+} StringValidation; |
|
267 |
+ |
|
261 | 268 |
typedef struct Writer { |
262 | 269 |
const AVClass *priv_class; ///< private class of the writer, if any |
263 | 270 |
int priv_size; ///< private size for the writer context |
... | ... |
@@ -298,6 +305,10 @@ struct WriterContext { |
298 | 298 |
unsigned int nb_section_packet; ///< number of the packet section in case we are in "packets_and_frames" section |
299 | 299 |
unsigned int nb_section_frame; ///< number of the frame section in case we are in "packets_and_frames" section |
300 | 300 |
unsigned int nb_section_packet_frame; ///< nb_section_packet or nb_section_frame according if is_packets_and_frames |
301 |
+ |
|
302 |
+ StringValidation string_validation; |
|
303 |
+ char *string_validation_replacement; |
|
304 |
+ unsigned int string_validation_utf8_flags; |
|
301 | 305 |
}; |
302 | 306 |
|
303 | 307 |
static const char *writer_get_name(void *p) |
... | ... |
@@ -308,6 +319,19 @@ static const char *writer_get_name(void *p) |
308 | 308 |
|
309 | 309 |
#define OFFSET(x) offsetof(WriterContext, x) |
310 | 310 |
|
311 |
+static const AVOption writer_options[] = { |
|
312 |
+ { "string_validation", "set string validation mode", |
|
313 |
+ OFFSET(string_validation), AV_OPT_TYPE_INT, {.i64=WRITER_STRING_VALIDATION_REPLACE}, 0, WRITER_STRING_VALIDATION_NB-1, .unit = "sv" }, |
|
314 |
+ { "sv", "set string validation mode", |
|
315 |
+ OFFSET(string_validation), AV_OPT_TYPE_INT, {.i64=WRITER_STRING_VALIDATION_REPLACE}, 0, WRITER_STRING_VALIDATION_NB-1, .unit = "sv" }, |
|
316 |
+ { "ignore", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = WRITER_STRING_VALIDATION_IGNORE}, .unit = "sv" }, |
|
317 |
+ { "replace", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = WRITER_STRING_VALIDATION_REPLACE}, .unit = "sv" }, |
|
318 |
+ { "fail", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = WRITER_STRING_VALIDATION_FAIL}, .unit = "sv" }, |
|
319 |
+ { "string_validation_replacement", "set string validation replacement string", OFFSET(string_validation_replacement), AV_OPT_TYPE_STRING, {.str=""}}, |
|
320 |
+ { "svr", "set string validation replacement string", OFFSET(string_validation_replacement), AV_OPT_TYPE_STRING, {.str=""}}, |
|
321 |
+ { NULL }, |
|
322 |
+}; |
|
323 |
+ |
|
311 | 324 |
static void *writer_child_next(void *obj, void *prev) |
312 | 325 |
{ |
313 | 326 |
WriterContext *ctx = obj; |
... | ... |
@@ -321,6 +345,7 @@ static const AVClass writer_class = { |
321 | 321 |
writer_get_name, |
322 | 322 |
NULL, |
323 | 323 |
LIBAVUTIL_VERSION_INT, |
324 |
+ .option = writer_options, |
|
324 | 325 |
.child_next = writer_child_next, |
325 | 326 |
}; |
326 | 327 |
|
... | ... |
@@ -341,6 +366,15 @@ static void writer_close(WriterContext **wctx) |
341 | 341 |
av_freep(wctx); |
342 | 342 |
} |
343 | 343 |
|
344 |
+static void bprint_bytes(AVBPrint *bp, const uint8_t *ubuf, size_t ubuf_size) |
|
345 |
+{ |
|
346 |
+ int i; |
|
347 |
+ av_bprintf(bp, "0X"); |
|
348 |
+ for (i = 0; i < ubuf_size; i++) |
|
349 |
+ av_bprintf(bp, "%02X", ubuf[i]); |
|
350 |
+} |
|
351 |
+ |
|
352 |
+ |
|
344 | 353 |
static int writer_open(WriterContext **wctx, const Writer *writer, const char *args, |
345 | 354 |
const struct section *sections, int nb_sections) |
346 | 355 |
{ |
... | ... |
@@ -393,6 +427,26 @@ static int writer_open(WriterContext **wctx, const Writer *writer, const char *a |
393 | 393 |
av_dict_free(&opts); |
394 | 394 |
} |
395 | 395 |
|
396 |
+ /* validate replace string */ |
|
397 |
+ { |
|
398 |
+ const uint8_t *p = (*wctx)->string_validation_replacement; |
|
399 |
+ const uint8_t *endp = p + strlen(p); |
|
400 |
+ while (*p) { |
|
401 |
+ const uint8_t *p0 = p; |
|
402 |
+ int32_t code; |
|
403 |
+ ret = av_utf8_decode(&code, &p, endp, (*wctx)->string_validation_utf8_flags); |
|
404 |
+ if (ret < 0) { |
|
405 |
+ AVBPrint bp; |
|
406 |
+ av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC); |
|
407 |
+ bprint_bytes(&bp, p0, p-p0), |
|
408 |
+ av_log(wctx, AV_LOG_ERROR, |
|
409 |
+ "Invalid UTF8 sequence %s found in string validation replace '%s'\n", |
|
410 |
+ bp.str, (*wctx)->string_validation_replacement); |
|
411 |
+ return ret; |
|
412 |
+ } |
|
413 |
+ } |
|
414 |
+ } |
|
415 |
+ |
|
396 | 416 |
for (i = 0; i < SECTION_MAX_NB_LEVELS; i++) |
397 | 417 |
av_bprint_init(&(*wctx)->section_pbuf[i], 1, AV_BPRINT_SIZE_UNLIMITED); |
398 | 418 |
|
... | ... |
@@ -460,17 +514,94 @@ static inline void writer_print_integer(WriterContext *wctx, |
460 | 460 |
} |
461 | 461 |
} |
462 | 462 |
|
463 |
+static inline int validate_string(WriterContext *wctx, char **dstp, const char *src) |
|
464 |
+{ |
|
465 |
+ const uint8_t *p, *endp; |
|
466 |
+ AVBPrint dstbuf; |
|
467 |
+ int invalid_chars_nb = 0, ret = 0; |
|
468 |
+ |
|
469 |
+ av_bprint_init(&dstbuf, 0, AV_BPRINT_SIZE_UNLIMITED); |
|
470 |
+ |
|
471 |
+ endp = src + strlen(src); |
|
472 |
+ for (p = (uint8_t *)src; *p;) { |
|
473 |
+ uint32_t code; |
|
474 |
+ int invalid = 0; |
|
475 |
+ const uint8_t *p0 = p; |
|
476 |
+ |
|
477 |
+ if (av_utf8_decode(&code, &p, endp, wctx->string_validation_utf8_flags) < 0) { |
|
478 |
+ AVBPrint bp; |
|
479 |
+ av_bprint_init(&bp, 0, AV_BPRINT_SIZE_AUTOMATIC); |
|
480 |
+ bprint_bytes(&bp, p0, p-p0); |
|
481 |
+ av_log(wctx, AV_LOG_DEBUG, |
|
482 |
+ "Invalid UTF-8 sequence %s found in string '%s'\n", bp.str, src); |
|
483 |
+ invalid = 1; |
|
484 |
+ } |
|
485 |
+ |
|
486 |
+ if (invalid) { |
|
487 |
+ invalid_chars_nb++; |
|
488 |
+ |
|
489 |
+ switch (wctx->string_validation) { |
|
490 |
+ case WRITER_STRING_VALIDATION_FAIL: |
|
491 |
+ av_log(wctx, AV_LOG_ERROR, |
|
492 |
+ "Invalid UTF-8 sequence found in string '%s'\n", src); |
|
493 |
+ ret = AVERROR_INVALIDDATA; |
|
494 |
+ goto end; |
|
495 |
+ break; |
|
496 |
+ |
|
497 |
+ case WRITER_STRING_VALIDATION_REPLACE: |
|
498 |
+ av_bprintf(&dstbuf, "%s", wctx->string_validation_replacement); |
|
499 |
+ break; |
|
500 |
+ } |
|
501 |
+ } |
|
502 |
+ |
|
503 |
+ if (!invalid || wctx->string_validation == WRITER_STRING_VALIDATION_IGNORE) |
|
504 |
+ av_bprint_append_data(&dstbuf, p0, p-p0); |
|
505 |
+ } |
|
506 |
+ |
|
507 |
+ if (invalid_chars_nb && wctx->string_validation == WRITER_STRING_VALIDATION_REPLACE) { |
|
508 |
+ av_log(wctx, AV_LOG_WARNING, |
|
509 |
+ "%d invalid UTF-8 sequence(s) found in string '%s', replaced with '%s'\n", |
|
510 |
+ invalid_chars_nb, src, wctx->string_validation_replacement); |
|
511 |
+ } |
|
512 |
+ |
|
513 |
+end: |
|
514 |
+ av_bprint_finalize(&dstbuf, dstp); |
|
515 |
+ return ret; |
|
516 |
+} |
|
517 |
+ |
|
518 |
+#define PRINT_STRING_OPT 1 |
|
519 |
+#define PRINT_STRING_VALIDATE 2 |
|
520 |
+ |
|
463 | 521 |
static inline int writer_print_string(WriterContext *wctx, |
464 |
- const char *key, const char *val, int opt) |
|
522 |
+ const char *key, const char *val, int flags) |
|
465 | 523 |
{ |
466 | 524 |
const struct section *section = wctx->section[wctx->level]; |
467 | 525 |
int ret = 0; |
468 | 526 |
|
469 |
- if (opt && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS)) |
|
527 |
+ if ((flags & PRINT_STRING_OPT) |
|
528 |
+ && !(wctx->writer->flags & WRITER_FLAG_DISPLAY_OPTIONAL_FIELDS)) |
|
470 | 529 |
return 0; |
471 | 530 |
|
472 | 531 |
if (section->show_all_entries || av_dict_get(section->entries_to_show, key, NULL, 0)) { |
473 |
- wctx->writer->print_string(wctx, key, val); |
|
532 |
+ if (flags & PRINT_STRING_VALIDATE) { |
|
533 |
+ char *key1 = NULL, *val1 = NULL; |
|
534 |
+ ret = validate_string(wctx, &key1, key); |
|
535 |
+ if (ret < 0) goto end; |
|
536 |
+ ret = validate_string(wctx, &val1, val); |
|
537 |
+ if (ret < 0) goto end; |
|
538 |
+ wctx->writer->print_string(wctx, key1, val1); |
|
539 |
+ end: |
|
540 |
+ if (ret < 0) { |
|
541 |
+ av_log(wctx, AV_LOG_ERROR, |
|
542 |
+ "Invalid key=value string combination %s=%s in section %s\n", |
|
543 |
+ key, val, section->unique_name); |
|
544 |
+ } |
|
545 |
+ av_free(key1); |
|
546 |
+ av_free(val1); |
|
547 |
+ } else { |
|
548 |
+ wctx->writer->print_string(wctx, key, val); |
|
549 |
+ } |
|
550 |
+ |
|
474 | 551 |
wctx->nb_item[wctx->level]++; |
475 | 552 |
} |
476 | 553 |
|
... | ... |
@@ -492,7 +623,7 @@ static void writer_print_time(WriterContext *wctx, const char *key, |
492 | 492 |
char buf[128]; |
493 | 493 |
|
494 | 494 |
if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) { |
495 |
- writer_print_string(wctx, key, "N/A", 1); |
|
495 |
+ writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT); |
|
496 | 496 |
} else { |
497 | 497 |
double d = ts * av_q2d(*time_base); |
498 | 498 |
struct unit_value uv; |
... | ... |
@@ -506,7 +637,7 @@ static void writer_print_time(WriterContext *wctx, const char *key, |
506 | 506 |
static void writer_print_ts(WriterContext *wctx, const char *key, int64_t ts, int is_duration) |
507 | 507 |
{ |
508 | 508 |
if ((!is_duration && ts == AV_NOPTS_VALUE) || (is_duration && ts == 0)) { |
509 |
- writer_print_string(wctx, key, "N/A", 1); |
|
509 |
+ writer_print_string(wctx, key, "N/A", PRINT_STRING_OPT); |
|
510 | 510 |
} else { |
511 | 511 |
writer_print_integer(wctx, key, ts); |
512 | 512 |
} |
... | ... |
@@ -1476,7 +1607,8 @@ static void writer_register_all(void) |
1476 | 1476 |
#define print_int(k, v) writer_print_integer(w, k, v) |
1477 | 1477 |
#define print_q(k, v, s) writer_print_rational(w, k, v, s) |
1478 | 1478 |
#define print_str(k, v) writer_print_string(w, k, v, 0) |
1479 |
-#define print_str_opt(k, v) writer_print_string(w, k, v, 1) |
|
1479 |
+#define print_str_opt(k, v) writer_print_string(w, k, v, PRINT_STRING_OPT) |
|
1480 |
+#define print_str_validate(k, v) writer_print_string(w, k, v, PRINT_STRING_VALIDATE) |
|
1480 | 1481 |
#define print_time(k, v, tb) writer_print_time(w, k, v, tb, 0) |
1481 | 1482 |
#define print_ts(k, v) writer_print_ts(w, k, v, 0) |
1482 | 1483 |
#define print_duration_time(k, v, tb) writer_print_time(w, k, v, tb, 1) |
... | ... |
@@ -1491,21 +1623,20 @@ static void writer_register_all(void) |
1491 | 1491 |
#define print_section_header(s) writer_print_section_header(w, s) |
1492 | 1492 |
#define print_section_footer(s) writer_print_section_footer(w, s) |
1493 | 1493 |
|
1494 |
-static inline int show_tags(WriterContext *wctx, AVDictionary *tags, int section_id) |
|
1494 |
+static inline int show_tags(WriterContext *w, AVDictionary *tags, int section_id) |
|
1495 | 1495 |
{ |
1496 | 1496 |
AVDictionaryEntry *tag = NULL; |
1497 | 1497 |
int ret = 0; |
1498 | 1498 |
|
1499 | 1499 |
if (!tags) |
1500 | 1500 |
return 0; |
1501 |
- writer_print_section_header(wctx, section_id); |
|
1501 |
+ writer_print_section_header(w, section_id); |
|
1502 | 1502 |
|
1503 | 1503 |
while ((tag = av_dict_get(tags, "", tag, AV_DICT_IGNORE_SUFFIX))) { |
1504 |
- ret = writer_print_string(wctx, tag->key, tag->value, 0); |
|
1505 |
- if (ret < 0) |
|
1504 |
+ if ((ret = print_str_validate(tag->key, tag->value)) < 0) |
|
1506 | 1505 |
break; |
1507 | 1506 |
} |
1508 |
- writer_print_section_footer(wctx); |
|
1507 |
+ writer_print_section_footer(w); |
|
1509 | 1508 |
|
1510 | 1509 |
return ret; |
1511 | 1510 |
} |
... | ... |
@@ -2054,7 +2185,7 @@ static int show_format(WriterContext *w, AVFormatContext *fmt_ctx) |
2054 | 2054 |
int ret = 0; |
2055 | 2055 |
|
2056 | 2056 |
writer_print_section_header(w, SECTION_ID_FORMAT); |
2057 |
- print_str("filename", fmt_ctx->filename); |
|
2057 |
+ print_str_validate("filename", fmt_ctx->filename); |
|
2058 | 2058 |
print_int("nb_streams", fmt_ctx->nb_streams); |
2059 | 2059 |
print_int("nb_programs", fmt_ctx->nb_programs); |
2060 | 2060 |
print_str("format_name", fmt_ctx->iformat->name); |
... | ... |
@@ -2755,6 +2886,9 @@ int main(int argc, char **argv) |
2755 | 2755 |
|
2756 | 2756 |
if ((ret = writer_open(&wctx, w, w_args, |
2757 | 2757 |
sections, FF_ARRAY_ELEMS(sections))) >= 0) { |
2758 |
+ if (w == &xml_writer) |
|
2759 |
+ wctx->string_validation_utf8_flags |= AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES; |
|
2760 |
+ |
|
2758 | 2761 |
writer_print_section_header(wctx, SECTION_ID_ROOT); |
2759 | 2762 |
|
2760 | 2763 |
if (do_show_program_version) |