Browse code

Improve support for PGS subtitles.

The previous implementation assumed that a new picture would always
supersede the previous picture. Similarly, presentation segments
were assumed to pertain to the most-recently-read picture.

However, each presentation segment may refer to 0 or more pictures
by their ID. Picture IDs may repeat, and a repeated picture ID
indicates that the old picture for that ID is no longer needed
and may be discarded.

The new implementation allocates a buffer with one slot for each
possible picture ID (the picture ID is a 16-bit field) and
properly decodes presentation segments so that all relevant
pictures are output upon encountering a display segment.

Given that most PGS streams are unlikely to use more than a small
fraction of the available picture IDs, it would probably be better
to use a more memory-efficient data structure. I'm lazy though, so
I leave this to a more motivated individual.

I've tested the code with MKV files in VLC (a recent revision from
their git repo) and with HandBrake (a version that I hacked up to
use ffmpeg's PGS subtitle decoder).

Review-by: Hendrik Leppkes <h.leppkes@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

David Mitchell authored on 2012/01/20 00:31:01
Showing 2 changed files
... ...
@@ -26,6 +26,7 @@ version next:
26 26
 - ffprobe -show_program_version, -show_library_versions, -show_versions options
27 27
 - rv34: frame-level multi-threading
28 28
 - optimized iMDCT transform on x86 using SSE for for mpegaudiodec
29
+- Improved PGS subtitle decoder
29 30
 
30 31
 
31 32
 version 0.9:
... ...
@@ -40,11 +40,16 @@ enum SegmentType {
40 40
     DISPLAY_SEGMENT      = 0x80,
41 41
 };
42 42
 
43
-typedef struct PGSSubPresentation {
43
+typedef struct PGSSubPictureReference {
44 44
     int x;
45 45
     int y;
46
-    int id_number;
47
-    int object_number;
46
+    int picture_id;
47
+} PGSSubPictureReference;
48
+
49
+typedef struct PGSSubPresentation {
50
+    int                    id_number;
51
+    int                    object_count;
52
+    PGSSubPictureReference *objects;
48 53
 } PGSSubPresentation;
49 54
 
50 55
 typedef struct PGSSubPicture {
... ...
@@ -58,22 +63,29 @@ typedef struct PGSSubPicture {
58 58
 typedef struct PGSSubContext {
59 59
     PGSSubPresentation presentation;
60 60
     uint32_t           clut[256];
61
-    PGSSubPicture      picture;
61
+    PGSSubPicture      pictures[UINT16_MAX];
62 62
 } PGSSubContext;
63 63
 
64 64
 static av_cold int init_decoder(AVCodecContext *avctx)
65 65
 {
66
-    avctx->pix_fmt = PIX_FMT_PAL8;
66
+    avctx->pix_fmt     = PIX_FMT_PAL8;
67 67
 
68 68
     return 0;
69 69
 }
70 70
 
71 71
 static av_cold int close_decoder(AVCodecContext *avctx)
72 72
 {
73
+    uint16_t picture;
74
+
73 75
     PGSSubContext *ctx = avctx->priv_data;
74 76
 
75
-    av_freep(&ctx->picture.rle);
76
-    ctx->picture.rle_buffer_size  = 0;
77
+    av_freep(&ctx->presentation.objects);
78
+    ctx->presentation.object_count = 0;
79
+
80
+    for (picture = 0; picture < UINT16_MAX; ++picture) {
81
+        av_freep(&ctx->pictures[picture].rle);
82
+        ctx->pictures[picture].rle_buffer_size = 0;
83
+    }
77 84
 
78 85
     return 0;
79 86
 }
... ...
@@ -88,7 +100,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
88 88
  * @param buf pointer to the RLE data to process
89 89
  * @param buf_size size of the RLE data to process
90 90
  */
91
-static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
91
+static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub, int rect,
92 92
                       const uint8_t *buf, unsigned int buf_size)
93 93
 {
94 94
     const uint8_t *rle_bitmap_end;
... ...
@@ -96,15 +108,15 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
96 96
 
97 97
     rle_bitmap_end = buf + buf_size;
98 98
 
99
-    sub->rects[0]->pict.data[0] = av_malloc(sub->rects[0]->w * sub->rects[0]->h);
99
+    sub->rects[rect]->pict.data[0] = av_malloc(sub->rects[rect]->w * sub->rects[rect]->h);
100 100
 
101
-    if (!sub->rects[0]->pict.data[0])
101
+    if (!sub->rects[rect]->pict.data[0])
102 102
         return -1;
103 103
 
104 104
     pixel_count = 0;
105 105
     line_count  = 0;
106 106
 
107
-    while (buf < rle_bitmap_end && line_count < sub->rects[0]->h) {
107
+    while (buf < rle_bitmap_end && line_count < sub->rects[rect]->h) {
108 108
         uint8_t flags, color;
109 109
         int run;
110 110
 
... ...
@@ -119,27 +131,27 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitle *sub,
119 119
             color = flags & 0x80 ? bytestream_get_byte(&buf) : 0;
120 120
         }
121 121
 
122
-        if (run > 0 && pixel_count + run <= sub->rects[0]->w * sub->rects[0]->h) {
123
-            memset(sub->rects[0]->pict.data[0] + pixel_count, color, run);
122
+        if (run > 0 && pixel_count + run <= sub->rects[rect]->w * sub->rects[rect]->h) {
123
+            memset(sub->rects[rect]->pict.data[0] + pixel_count, color, run);
124 124
             pixel_count += run;
125 125
         } else if (!run) {
126 126
             /*
127 127
              * New Line. Check if correct pixels decoded, if not display warning
128 128
              * and adjust bitmap pointer to correct new line position.
129 129
              */
130
-            if (pixel_count % sub->rects[0]->w > 0)
130
+            if (pixel_count % sub->rects[rect]->w > 0)
131 131
                 av_log(avctx, AV_LOG_ERROR, "Decoded %d pixels, when line should be %d pixels\n",
132
-                       pixel_count % sub->rects[0]->w, sub->rects[0]->w);
132
+                       pixel_count % sub->rects[rect]->w, sub->rects[rect]->w);
133 133
             line_count++;
134 134
         }
135 135
     }
136 136
 
137
-    if (pixel_count < sub->rects[0]->w * sub->rects[0]->h) {
137
+    if (pixel_count < sub->rects[rect]->w * sub->rects[rect]->h) {
138 138
         av_log(avctx, AV_LOG_ERROR, "Insufficient RLE data for subtitle\n");
139 139
         return -1;
140 140
     }
141 141
 
142
-    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[0]->w * sub->rects[0]->h);
142
+    av_dlog(avctx, "Pixel Count = %d, Area = %d\n", pixel_count, sub->rects[rect]->w * sub->rects[rect]->h);
143 143
 
144 144
     return 0;
145 145
 }
... ...
@@ -162,25 +174,28 @@ static int parse_picture_segment(AVCodecContext *avctx,
162 162
 
163 163
     uint8_t sequence_desc;
164 164
     unsigned int rle_bitmap_len, width, height;
165
+    uint16_t picture_id;
165 166
 
166 167
     if (buf_size <= 4)
167 168
         return -1;
168 169
     buf_size -= 4;
169 170
 
170
-    /* skip 3 unknown bytes: Object ID (2 bytes), Version Number */
171
-    buf += 3;
171
+    picture_id = bytestream_get_be16(&buf);
172
+
173
+    /* skip 1 unknown byte: Version Number */
174
+    buf++;
172 175
 
173 176
     /* Read the Sequence Description to determine if start of RLE data or appended to previous RLE */
174 177
     sequence_desc = bytestream_get_byte(&buf);
175 178
 
176 179
     if (!(sequence_desc & 0x80)) {
177 180
         /* Additional RLE data */
178
-        if (buf_size > ctx->picture.rle_remaining_len)
181
+        if (buf_size > ctx->pictures[picture_id].rle_remaining_len)
179 182
             return -1;
180 183
 
181
-        memcpy(ctx->picture.rle + ctx->picture.rle_data_len, buf, buf_size);
182
-        ctx->picture.rle_data_len += buf_size;
183
-        ctx->picture.rle_remaining_len -= buf_size;
184
+        memcpy(ctx->pictures[picture_id].rle + ctx->pictures[picture_id].rle_data_len, buf, buf_size);
185
+        ctx->pictures[picture_id].rle_data_len += buf_size;
186
+        ctx->pictures[picture_id].rle_remaining_len -= buf_size;
184 187
 
185 188
         return 0;
186 189
     }
... ...
@@ -202,17 +217,17 @@ static int parse_picture_segment(AVCodecContext *avctx,
202 202
         return -1;
203 203
     }
204 204
 
205
-    ctx->picture.w = width;
206
-    ctx->picture.h = height;
205
+    ctx->pictures[picture_id].w = width;
206
+    ctx->pictures[picture_id].h = height;
207 207
 
208
-    av_fast_malloc(&ctx->picture.rle, &ctx->picture.rle_buffer_size, rle_bitmap_len);
208
+    av_fast_malloc(&ctx->pictures[picture_id].rle, &ctx->pictures[picture_id].rle_buffer_size, rle_bitmap_len);
209 209
 
210
-    if (!ctx->picture.rle)
210
+    if (!ctx->pictures[picture_id].rle)
211 211
         return -1;
212 212
 
213
-    memcpy(ctx->picture.rle, buf, buf_size);
214
-    ctx->picture.rle_data_len = buf_size;
215
-    ctx->picture.rle_remaining_len = rle_bitmap_len - buf_size;
213
+    memcpy(ctx->pictures[picture_id].rle, buf, buf_size);
214
+    ctx->pictures[picture_id].rle_data_len      = buf_size;
215
+    ctx->pictures[picture_id].rle_remaining_len = rle_bitmap_len - buf_size;
216 216
 
217 217
     return 0;
218 218
 }
... ...
@@ -275,11 +290,11 @@ static void parse_presentation_segment(AVCodecContext *avctx,
275 275
 {
276 276
     PGSSubContext *ctx = avctx->priv_data;
277 277
 
278
-    int x, y;
279
-
280 278
     int w = bytestream_get_be16(&buf);
281 279
     int h = bytestream_get_be16(&buf);
282 280
 
281
+    uint16_t object_index;
282
+
283 283
     av_dlog(avctx, "Video Dimensions %dx%d\n",
284 284
             w, h);
285 285
     if (av_image_check_size(w, h, 0, avctx) >= 0)
... ...
@@ -298,34 +313,48 @@ static void parse_presentation_segment(AVCodecContext *avctx,
298 298
      */
299 299
     buf += 3;
300 300
 
301
-    ctx->presentation.object_number = bytestream_get_byte(&buf);
302
-    if (!ctx->presentation.object_number)
301
+    ctx->presentation.object_count = bytestream_get_byte(&buf);
302
+    if (!ctx->presentation.object_count)
303 303
         return;
304 304
 
305
-    /*
306
-     * Skip 4 bytes of unknown:
307
-     *     object_id_ref (2 bytes),
308
-     *     window_id_ref,
309
-     *     composition_flag (0x80 - object cropped, 0x40 - object forced)
310
-     */
311
-    buf += 4;
305
+    /* Verify that enough bytes are remaining for all of the objects. */
306
+    buf_size -= 11;
307
+    if (buf_size < ctx->presentation.object_count * 8) {
308
+        ctx->presentation.object_count = 0;
309
+        return;
310
+    }
312 311
 
313
-    x = bytestream_get_be16(&buf);
314
-    y = bytestream_get_be16(&buf);
312
+    av_freep(&ctx->presentation.objects);
313
+    ctx->presentation.objects = av_malloc(sizeof(PGSSubPictureReference) * ctx->presentation.object_count);
314
+    if (!ctx->presentation.objects) {
315
+        ctx->presentation.object_count = 0;
316
+        return;
317
+    }
315 318
 
316
-    /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/
319
+    for (object_index = 0; object_index < ctx->presentation.object_count; ++object_index) {
320
+        PGSSubPictureReference *reference = &ctx->presentation.objects[object_index];
321
+        reference->picture_id             = bytestream_get_be16(&buf);
317 322
 
318
-    av_dlog(avctx, "Subtitle Placement x=%d, y=%d\n", x, y);
323
+         /*
324
+         * Skip 2 bytes of unknown:
325
+         *     window_id_ref,
326
+         *     composition_flag (0x80 - object cropped, 0x40 - object forced)
327
+         */
328
+        buf += 2;
319 329
 
320
-    if (x > avctx->width || y > avctx->height) {
321
-        av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
322
-               x, y, avctx->width, avctx->height);
323
-        x = 0; y = 0;
324
-    }
330
+        reference->x = bytestream_get_be16(&buf);
331
+        reference->y = bytestream_get_be16(&buf);
325 332
 
326
-    /* Fill in dimensions */
327
-    ctx->presentation.x = x;
328
-    ctx->presentation.y = y;
333
+        /* TODO If cropping, cropping_x, cropping_y, cropping_width, cropping_height (all 2 bytes).*/
334
+        av_dlog(avctx, "Subtitle Placement ID=%d, x=%d, y=%d\n", reference->picture_id, reference->x, reference->y);
335
+
336
+        if (reference->x > avctx->width || reference->y > avctx->height) {
337
+            av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
338
+                   reference->x, reference->y, avctx->width, avctx->height);
339
+            reference->x = 0;
340
+            reference->y = 0;
341
+        }
342
+    }
329 343
 }
330 344
 
331 345
 /**
... ...
@@ -349,45 +378,52 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
349 349
     AVSubtitle    *sub = data;
350 350
     PGSSubContext *ctx = avctx->priv_data;
351 351
 
352
+    uint16_t rect;
353
+
352 354
     /*
353 355
      *      The end display time is a timeout value and is only reached
354
-     *      if the next subtitle is later then timeout or subtitle has
356
+     *      if the next subtitle is later than timeout or subtitle has
355 357
      *      not been cleared by a subsequent empty display command.
356 358
      */
357 359
 
358
-    // Blank if last object_number was 0.
359
-    // Note that this may be wrong for more complex subtitles.
360
-    if (!ctx->presentation.object_number)
360
+    memset(sub, 0, sizeof(*sub));
361
+
362
+    // Blank if last object_count was 0.
363
+    if (!ctx->presentation.object_count)
361 364
         return 1;
365
+
362 366
     sub->start_display_time = 0;
363 367
     sub->end_display_time   = 20000;
364 368
     sub->format             = 0;
365 369
 
366
-    sub->rects     = av_mallocz(sizeof(*sub->rects));
367
-    sub->rects[0]  = av_mallocz(sizeof(*sub->rects[0]));
368
-    sub->num_rects = 1;
369
-
370
-    sub->rects[0]->x    = ctx->presentation.x;
371
-    sub->rects[0]->y    = ctx->presentation.y;
372
-    sub->rects[0]->w    = ctx->picture.w;
373
-    sub->rects[0]->h    = ctx->picture.h;
374
-    sub->rects[0]->type = SUBTITLE_BITMAP;
375
-
376
-    /* Process bitmap */
377
-    sub->rects[0]->pict.linesize[0] = ctx->picture.w;
378
-
379
-    if (ctx->picture.rle) {
380
-        if (ctx->picture.rle_remaining_len)
381
-            av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
382
-                   ctx->picture.rle_data_len, ctx->picture.rle_remaining_len);
383
-        if(decode_rle(avctx, sub, ctx->picture.rle, ctx->picture.rle_data_len) < 0)
384
-            return 0;
385
-    }
386
-    /* Allocate memory for colors */
387
-    sub->rects[0]->nb_colors    = 256;
388
-    sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
370
+    sub->num_rects = ctx->presentation.object_count;
371
+    sub->rects     = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
372
+
373
+    for (rect = 0; rect < sub->num_rects; ++rect) {
374
+        uint16_t picture_id    = ctx->presentation.objects[rect].picture_id;
375
+        sub->rects[rect]       = av_mallocz(sizeof(*sub->rects[rect]));
376
+        sub->rects[rect]->x    = ctx->presentation.objects[rect].x;
377
+        sub->rects[rect]->y    = ctx->presentation.objects[rect].y;
378
+        sub->rects[rect]->w    = ctx->pictures[picture_id].w;
379
+        sub->rects[rect]->h    = ctx->pictures[picture_id].h;
380
+        sub->rects[rect]->type = SUBTITLE_BITMAP;
381
+
382
+        /* Process bitmap */
383
+        sub->rects[rect]->pict.linesize[0] = ctx->pictures[picture_id].w;
384
+        if (ctx->pictures[picture_id].rle) {
385
+            if (ctx->pictures[picture_id].rle_remaining_len)
386
+                av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
387
+                       ctx->pictures[picture_id].rle_data_len, ctx->pictures[picture_id].rle_remaining_len);
388
+            if (decode_rle(avctx, sub, rect, ctx->pictures[picture_id].rle, ctx->pictures[picture_id].rle_data_len) < 0)
389
+                return 0;
390
+        }
391
+
392
+        /* Allocate memory for colors */
393
+        sub->rects[rect]->nb_colors    = 256;
394
+        sub->rects[rect]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
389 395
 
390
-    memcpy(sub->rects[0]->pict.data[1], ctx->clut, sub->rects[0]->nb_colors * sizeof(uint32_t));
396
+        memcpy(sub->rects[rect]->pict.data[1], ctx->clut, sub->rects[rect]->nb_colors * sizeof(uint32_t));
397
+    }
391 398
 
392 399
     return 1;
393 400
 }