The previous implementation assumed that a new picture would always
supersede the previous picture. Similarly, presentation segments
were assumed to pertain to the most-recently-read picture.
However, each presentation segment may refer to 0 or more pictures
by their ID. Picture IDs may repeat, and a repeated picture ID
indicates that the old picture for that ID is no longer needed
and may be discarded.
The new implementation allocates a buffer with one slot for each
possible picture ID (the picture ID is a 16-bit field) and
properly decodes presentation segments so that all relevant
pictures are output upon encountering a display segment.
Given that most PGS streams are unlikely to use more than a small
fraction of the available picture IDs, it would probably be better
to use a more memory-efficient data structure. I'm lazy though, so
I leave this to a more motivated individual.
I've tested the code with MKV files in VLC (a recent revision from
their git repo) and with HandBrake (a version that I hacked up to
use ffmpeg's PGS subtitle decoder).
Review-by: Hendrik Leppkes <h.leppkes@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -26,6 +26,7 @@ version next: |
26 | 26 |
- ffprobe -show_program_version, -show_library_versions, -show_versions options |
27 | 27 |
- rv34: frame-level multi-threading |
28 | 28 |
- optimized iMDCT transform on x86 using SSE for for mpegaudiodec |
29 |
+- Improved PGS subtitle decoder |
|
29 | 30 |
|
30 | 31 |
|
31 | 32 |
version 0.9: |
... | ... |
@@ -40,11 +40,16 @@ enum SegmentType { |
40 | 40 |
DISPLAY_SEGMENT = 0x80, |
41 | 41 |
}; |
42 | 42 |
|
43 |
-typedef struct PGSSubPresentation { |
|
43 |
+typedef struct PGSSubPictureReference { |
|
44 | 44 |
int x; |
45 | 45 |
int y; |
46 |
- int id_number; |
|
47 |
- int object_number; |
|
46 |
+ int picture_id; |
|
47 |
+} PGSSubPictureReference; |
|
48 |
+ |
|
49 |
+typedef struct PGSSubPresentation { |
|
50 |
+ int id_number; |
|
51 |
+ int object_count; |
|
52 |
+ PGSSubPictureReference *objects; |
|
48 | 53 |
} PGSSubPresentation; |
49 | 54 |
|
50 | 55 |
typedef struct PGSSubPicture { |
... | ... |
@@ -58,22 +63,29 @@ typedef struct PGSSubPicture { |
58 | 58 |
typedef struct PGSSubContext { |
59 | 59 |
PGSSubPresentation presentation; |
60 | 60 |
uint32_t clut[256]; |
61 |
- PGSSubPicture picture; |
|
61 |
+ PGSSubPicture pictures[UINT16_MAX]; |
|
62 | 62 |
} PGSSubContext; |
63 | 63 |
|
64 | 64 |
static av_cold int init_decoder(AVCodecContext *avctx) |
65 | 65 |
{ |
66 |
- avctx->pix_fmt = PIX_FMT_PAL8; |
|
66 |
+ avctx->pix_fmt = PIX_FMT_PAL8; |
|
67 | 67 |
|
68 | 68 |
return 0; |
69 | 69 |
} |
70 | 70 |
|
71 | 71 |
static av_cold int close_decoder(AVCodecContext *avctx) |
72 | 72 |
{ |
73 |
+ uint16_t picture; |
|
74 |
+ |
|
73 | 75 |
PGSSubContext *ctx = avctx->priv_data; |
74 | 76 |
|
75 |
- av_freep(&ctx->picture.rle); |
|
76 |
- ctx->picture.rle_buffer_size = 0; |
|
77 |
+ av_freep(&ctx->presentation.objects); |
|
78 |
+ ctx->presentation.object_count = 0; |
|
79 |
+ |
|
80 |
+ for (picture = 0; picture < UINT16_MAX; ++picture) { |
|
81 |
+ av_freep(&ctx->pictures[picture].rle); |
|
82 |
+ ctx->pictures[picture].rle_buffer_size = 0; |
|
83 |
+ } |
|
77 | 84 |
|
78 | 85 |
return 0; |
79 | 86 |
} |
... | ... |
@@ -88,7 +100,7 @@ static av_cold int close_decoder(AVCodecContext *avctx) |
88 | 88 |
* @param buf pointer to the RLE data to process |
89 | 89 |
* @param buf_size size of the RLE data to process |
90 | 90 |
*/ |
91 |
-static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, |
|
91 |
+static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, int rect, |
|
92 | 92 |
const uint8_t *buf, unsigned int buf_size) |
93 | 93 |
{ |
94 | 94 |
const uint8_t *rle_bitmap_end; |
... | ... |
@@ -96,15 +108,15 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, |
96 | 96 |
|
97 | 97 |
rle_bitmap_end = buf + buf_size; |
98 | 98 |
|
99 |
- sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w * sub->rects[0]->h); |
|
99 |
+ sub->rects[rect]->pict.data[0] = av_malloc(sub->rects[rect]->w * sub->rects[rect]->h); |
|
100 | 100 |
|
101 |
- if (!sub->rects[0]->pict.data[0]) |
|
101 |
+ if (!sub->rects[rect]->pict.data[0]) |
|
102 | 102 |
return -1; |
103 | 103 |
|
104 | 104 |
pixel_count = 0; |
105 | 105 |
line_count = 0; |
106 | 106 |
|
107 |
- while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) { |
|
107 |
+ while (buf < rle_bitmap_end && line_count < sub->rects[rect]->h) { |
|
108 | 108 |
uint8_t flags, color; |
109 | 109 |
int run; |
110 | 110 |
|
... | ... |
@@ -119,27 +131,27 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, |
119 | 119 |
color = flags & 0x80 ? bytestream_get_byte(&buf) : 0; |
120 | 120 |
} |
121 | 121 |
|
122 |
- if (run > 0 && pixel_count + run <= sub->rects[0]->w * sub->rects[0]->h) { |
|
123 |
- memset(sub->rects[0]->pict.data[0] + pixel_count, color, run); |
|
122 |
+ if (run > 0 && pixel_count + run <= sub->rects[rect]->w * sub->rects[rect]->h) { |
|
123 |
+ memset(sub->rects[rect]->pict.data[0] + pixel_count, color, run); |
|
124 | 124 |
pixel_count += run; |
125 | 125 |
} else if (!run) { |
126 | 126 |
/* |
127 | 127 |
* New Line. Check if correct pixels decoded, if not display warning |
128 | 128 |
* and adjust bitmap pointer to correct new line position. |
129 | 129 |
*/ |
130 |
- if (pixel_count % sub->rects[0]->w > 0) |
|
130 |
+ if (pixel_count % sub->rects[rect]->w > 0) |
|
131 | 131 |
av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line should be %d pixels\n", |
132 |
- pixel_count % sub->rects[0]->w, sub->rects[0]->w); |
|
132 |
+ pixel_count % sub->rects[rect]->w, sub->rects[rect]->w); |
|
133 | 133 |
line_count++; |
134 | 134 |
} |
135 | 135 |
} |
136 | 136 |
|
137 |
- if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) { |
|
137 |
+ if (pixel_count < sub->rects[rect]->w * sub->rects[rect]->h) { |
|
138 | 138 |
av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for subtitle\n"); |
139 | 139 |
return -1; |
140 | 140 |
} |
141 | 141 |
|
142 |
- av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[0]->w * sub->rects[0]->h); |
|
142 |
+ av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[rect]->w * sub->rects[rect]->h); |
|
143 | 143 |
|
144 | 144 |
return 0; |
145 | 145 |
} |
... | ... |
@@ -162,25 +174,28 @@ static int parse_picture_segment(AVCodecContext *avctx, |
162 | 162 |
|
163 | 163 |
uint8_t sequence_desc; |
164 | 164 |
unsigned int rle_bitmap_len, width, height; |
165 |
+ uint16_t picture_id; |
|
165 | 166 |
|
166 | 167 |
if (buf_size <= 4) |
167 | 168 |
return -1; |
168 | 169 |
buf_size -= 4; |
169 | 170 |
|
170 |
- /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */ |
|
171 |
- buf += 3; |
|
171 |
+ picture_id = bytestream_get_be16(&buf); |
|
172 |
+ |
|
173 |
+ /* skip 1 unknown byte: Version Number */ |
|
174 |
+ buf++; |
|
172 | 175 |
|
173 | 176 |
/* Read the Sequence Description to determine if start of RLE data or appended to previous RLE */ |
174 | 177 |
sequence_desc = bytestream_get_byte(&buf); |
175 | 178 |
|
176 | 179 |
if (!(sequence_desc & 0x80)) { |
177 | 180 |
/* Additional RLE data */ |
178 |
- if (buf_size > ctx->picture.rle_remaining_len) |
|
181 |
+ if (buf_size > ctx->pictures[picture_id].rle_remaining_len) |
|
179 | 182 |
return -1; |
180 | 183 |
|
181 |
- memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf, buf_size); |
|
182 |
- ctx->picture.rle_data_len += buf_size; |
|
183 |
- ctx->picture.rle_remaining_len -= buf_size; |
|
184 |
+ memcpy(ctx->pictures[picture_id].rle + ctx->pictures[picture_id].rle_data_len, buf, buf_size); |
|
185 |
+ ctx->pictures[picture_id].rle_data_len += buf_size; |
|
186 |
+ ctx->pictures[picture_id].rle_remaining_len -= buf_size; |
|
184 | 187 |
|
185 | 188 |
return 0; |
186 | 189 |
} |
... | ... |
@@ -202,17 +217,17 @@ static int parse_picture_segment(AVCodecContext *avctx, |
202 | 202 |
return -1; |
203 | 203 |
} |
204 | 204 |
|
205 |
- ctx->picture.w = width; |
|
206 |
- ctx->picture.h = height; |
|
205 |
+ ctx->pictures[picture_id].w = width; |
|
206 |
+ ctx->pictures[picture_id].h = height; |
|
207 | 207 |
|
208 |
- av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size, rle_bitmap_len); |
|
208 |
+ av_fast_malloc(&ctx->pictures[picture_id].rle, &ctx->pictures[picture_id].rle_buffer_size, rle_bitmap_len); |
|
209 | 209 |
|
210 |
- if (!ctx->picture.rle) |
|
210 |
+ if (!ctx->pictures[picture_id].rle) |
|
211 | 211 |
return -1; |
212 | 212 |
|
213 |
- memcpy(ctx->picture.rle, buf, buf_size); |
|
214 |
- ctx->picture.rle_data_len = buf_size; |
|
215 |
- ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size; |
|
213 |
+ memcpy(ctx->pictures[picture_id].rle, buf, buf_size); |
|
214 |
+ ctx->pictures[picture_id].rle_data_len = buf_size; |
|
215 |
+ ctx->pictures[picture_id].rle_remaining_len = rle_bitmap_len - buf_size; |
|
216 | 216 |
|
217 | 217 |
return 0; |
218 | 218 |
} |
... | ... |
@@ -275,11 +290,11 @@ static void parse_presentation_segment(AVCodecContext *avctx, |
275 | 275 |
{ |
276 | 276 |
PGSSubContext *ctx = avctx->priv_data; |
277 | 277 |
|
278 |
- int x, y; |
|
279 |
- |
|
280 | 278 |
int w = bytestream_get_be16(&buf); |
281 | 279 |
int h = bytestream_get_be16(&buf); |
282 | 280 |
|
281 |
+ uint16_t object_index; |
|
282 |
+ |
|
283 | 283 |
av_dlog(avctx, "Video Dimensions %dx%d\n", |
284 | 284 |
w, h); |
285 | 285 |
if (av_image_check_size(w, h, 0, avctx) >= 0) |
... | ... |
@@ -298,34 +313,48 @@ static void parse_presentation_segment(AVCodecContext *avctx, |
298 | 298 |
*/ |
299 | 299 |
buf += 3; |
300 | 300 |
|
301 |
- ctx->presentation.object_number = bytestream_get_byte(&buf); |
|
302 |
- if (!ctx->presentation.object_number) |
|
301 |
+ ctx->presentation.object_count = bytestream_get_byte(&buf); |
|
302 |
+ if (!ctx->presentation.object_count) |
|
303 | 303 |
return; |
304 | 304 |
|
305 |
- /* |
|
306 |
- * Skip 4 bytes of unknown: |
|
307 |
- * object_id_ref (2 bytes), |
|
308 |
- * window_id_ref, |
|
309 |
- * composition_flag (0x80 - object cropped, 0x40 - object forced) |
|
310 |
- */ |
|
311 |
- buf += 4; |
|
305 |
+ /* Verify that enough bytes are remaining for all of the objects. */ |
|
306 |
+ buf_size -= 11; |
|
307 |
+ if (buf_size < ctx->presentation.object_count * 8) { |
|
308 |
+ ctx->presentation.object_count = 0; |
|
309 |
+ return; |
|
310 |
+ } |
|
312 | 311 |
|
313 |
- x = bytestream_get_be16(&buf); |
|
314 |
- y = bytestream_get_be16(&buf); |
|
312 |
+ av_freep(&ctx->presentation.objects); |
|
313 |
+ ctx->presentation.objects = av_malloc(sizeof(PGSSubPictureReference) * ctx->presentation.object_count); |
|
314 |
+ if (!ctx->presentation.objects) { |
|
315 |
+ ctx->presentation.object_count = 0; |
|
316 |
+ return; |
|
317 |
+ } |
|
315 | 318 |
|
316 |
- /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/ |
|
319 |
+ for (object_index = 0; object_index < ctx->presentation.object_count; ++object_index) { |
|
320 |
+ PGSSubPictureReference *reference = &ctx->presentation.objects[object_index]; |
|
321 |
+ reference->picture_id = bytestream_get_be16(&buf); |
|
317 | 322 |
|
318 |
- av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y); |
|
323 |
+ /* |
|
324 |
+ * Skip 2 bytes of unknown: |
|
325 |
+ * window_id_ref, |
|
326 |
+ * composition_flag (0x80 - object cropped, 0x40 - object forced) |
|
327 |
+ */ |
|
328 |
+ buf += 2; |
|
319 | 329 |
|
320 |
- if (x > avctx->width || y > avctx->height) { |
|
321 |
- av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n", |
|
322 |
- x, y, avctx->width, avctx->height); |
|
323 |
- x = 0; y = 0; |
|
324 |
- } |
|
330 |
+ reference->x = bytestream_get_be16(&buf); |
|
331 |
+ reference->y = bytestream_get_be16(&buf); |
|
325 | 332 |
|
326 |
- /* Fill in dimensions */ |
|
327 |
- ctx->presentation.x = x; |
|
328 |
- ctx->presentation.y = y; |
|
333 |
+ /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/ |
|
334 |
+ av_dlog(avctx, "Subtitle Placement ID=%d, x=%d, y=%d\n", reference->picture_id, reference->x, reference->y); |
|
335 |
+ |
|
336 |
+ if (reference->x > avctx->width || reference->y > avctx->height) { |
|
337 |
+ av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n", |
|
338 |
+ reference->x, reference->y, avctx->width, avctx->height); |
|
339 |
+ reference->x = 0; |
|
340 |
+ reference->y = 0; |
|
341 |
+ } |
|
342 |
+ } |
|
329 | 343 |
} |
330 | 344 |
|
331 | 345 |
/** |
... | ... |
@@ -349,45 +378,52 @@ static int display_end_segment(AVCodecContext *avctx, void *data, |
349 | 349 |
AVSubtitle *sub = data; |
350 | 350 |
PGSSubContext *ctx = avctx->priv_data; |
351 | 351 |
|
352 |
+ uint16_t rect; |
|
353 |
+ |
|
352 | 354 |
/* |
353 | 355 |
* The end display time is a timeout value and is only reached |
354 |
- * if the next subtitle is later then timeout or subtitle has |
|
356 |
+ * if the next subtitle is later than timeout or subtitle has |
|
355 | 357 |
* not been cleared by a subsequent empty display command. |
356 | 358 |
*/ |
357 | 359 |
|
358 |
- // Blank if last object_number was 0. |
|
359 |
- // Note that this may be wrong for more complex subtitles. |
|
360 |
- if (!ctx->presentation.object_number) |
|
360 |
+ memset(sub, 0, sizeof(*sub)); |
|
361 |
+ |
|
362 |
+ // Blank if last object_count was 0. |
|
363 |
+ if (!ctx->presentation.object_count) |
|
361 | 364 |
return 1; |
365 |
+ |
|
362 | 366 |
sub->start_display_time = 0; |
363 | 367 |
sub->end_display_time = 20000; |
364 | 368 |
sub->format = 0; |
365 | 369 |
|
366 |
- sub->rects = av_mallocz(sizeof(*sub->rects)); |
|
367 |
- sub->rects[0] = av_mallocz(sizeof(*sub->rects[0])); |
|
368 |
- sub->num_rects = 1; |
|
369 |
- |
|
370 |
- sub->rects[0]->x = ctx->presentation.x; |
|
371 |
- sub->rects[0]->y = ctx->presentation.y; |
|
372 |
- sub->rects[0]->w = ctx->picture.w; |
|
373 |
- sub->rects[0]->h = ctx->picture.h; |
|
374 |
- sub->rects[0]->type = SUBTITLE_BITMAP; |
|
375 |
- |
|
376 |
- /* Process bitmap */ |
|
377 |
- sub->rects[0]->pict.linesize[0] = ctx->picture.w; |
|
378 |
- |
|
379 |
- if (ctx->picture.rle) { |
|
380 |
- if (ctx->picture.rle_remaining_len) |
|
381 |
- av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n", |
|
382 |
- ctx->picture.rle_data_len, ctx->picture.rle_remaining_len); |
|
383 |
- if(decode_rle(avctx, sub, ctx->picture.rle, ctx->picture.rle_data_len) < 0) |
|
384 |
- return 0; |
|
385 |
- } |
|
386 |
- /* Allocate memory for colors */ |
|
387 |
- sub->rects[0]->nb_colors = 256; |
|
388 |
- sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE); |
|
370 |
+ sub->num_rects = ctx->presentation.object_count; |
|
371 |
+ sub->rects = av_mallocz(sizeof(*sub->rects) * sub->num_rects); |
|
372 |
+ |
|
373 |
+ for (rect = 0; rect < sub->num_rects; ++rect) { |
|
374 |
+ uint16_t picture_id = ctx->presentation.objects[rect].picture_id; |
|
375 |
+ sub->rects[rect] = av_mallocz(sizeof(*sub->rects[rect])); |
|
376 |
+ sub->rects[rect]->x = ctx->presentation.objects[rect].x; |
|
377 |
+ sub->rects[rect]->y = ctx->presentation.objects[rect].y; |
|
378 |
+ sub->rects[rect]->w = ctx->pictures[picture_id].w; |
|
379 |
+ sub->rects[rect]->h = ctx->pictures[picture_id].h; |
|
380 |
+ sub->rects[rect]->type = SUBTITLE_BITMAP; |
|
381 |
+ |
|
382 |
+ /* Process bitmap */ |
|
383 |
+ sub->rects[rect]->pict.linesize[0] = ctx->pictures[picture_id].w; |
|
384 |
+ if (ctx->pictures[picture_id].rle) { |
|
385 |
+ if (ctx->pictures[picture_id].rle_remaining_len) |
|
386 |
+ av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n", |
|
387 |
+ ctx->pictures[picture_id].rle_data_len, ctx->pictures[picture_id].rle_remaining_len); |
|
388 |
+ if (decode_rle(avctx, sub, rect, ctx->pictures[picture_id].rle, ctx->pictures[picture_id].rle_data_len) < 0) |
|
389 |
+ return 0; |
|
390 |
+ } |
|
391 |
+ |
|
392 |
+ /* Allocate memory for colors */ |
|
393 |
+ sub->rects[rect]->nb_colors = 256; |
|
394 |
+ sub->rects[rect]->pict.data[1] = av_mallocz(AVPALETTE_SIZE); |
|
389 | 395 |
|
390 |
- memcpy(sub->rects[0]->pict.data[1], ctx->clut, sub->rects[0]->nb_colors * sizeof(uint32_t)); |
|
396 |
+ memcpy(sub->rects[rect]->pict.data[1], ctx->clut, sub->rects[rect]->nb_colors * sizeof(uint32_t)); |
|
397 |
+ } |
|
391 | 398 |
|
392 | 399 |
return 1; |
393 | 400 |
} |