Browse code

ffv1dec: Support frame threading with gop > 1

This is about 20-30% faster than slice threading

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2013/04/28 05:24:14
Showing 3 changed files
... ...
@@ -91,6 +91,7 @@ typedef struct FFV1Context {
91 91
     int flags;
92 92
     int picture_number;
93 93
     ThreadFrame picture, last_picture;
94
+    struct FFV1Context *fsrc;
94 95
 
95 96
     AVFrame *cur;
96 97
     int plane_count;
... ...
@@ -326,6 +326,42 @@ static int decode_slice(AVCodecContext *c, void *arg)
326 326
     int width, height, x, y, ret;
327 327
     const int ps      = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
328 328
     AVFrame * const p = f->cur;
329
+    int i, si;
330
+
331
+    for( si=0; fs != f->slice_context[si]; si ++)
332
+        ;
333
+
334
+    if(f->fsrc && !p->key_frame)
335
+        ff_thread_await_progress(&f->last_picture, si, 0);
336
+
337
+    if(f->fsrc && !p->key_frame) {
338
+        FFV1Context *fssrc = f->fsrc->slice_context[si];
339
+        FFV1Context *fsdst = f->slice_context[si];
340
+        av_assert1(fsdst->plane_count == fssrc->plane_count);
341
+        av_assert1(fsdst == fs);
342
+
343
+        if (!p->key_frame)
344
+            fsdst->slice_damaged |= fssrc->slice_damaged;
345
+
346
+        for (i = 0; i < f->plane_count; i++) {
347
+            PlaneContext *psrc = &fssrc->plane[i];
348
+            PlaneContext *pdst = &fsdst->plane[i];
349
+
350
+            av_free(pdst->state);
351
+            av_free(pdst->vlc_state);
352
+            memcpy(pdst, psrc, sizeof(*pdst));
353
+            pdst->state = NULL;
354
+            pdst->vlc_state = NULL;
355
+
356
+            if (fssrc->ac) {
357
+                pdst->state = av_malloc(CONTEXT_SIZE * psrc->context_count);
358
+                memcpy(pdst->state, psrc->state, CONTEXT_SIZE * psrc->context_count);
359
+            } else {
360
+                pdst->vlc_state = av_malloc(sizeof(*pdst->vlc_state) * psrc->context_count);
361
+                memcpy(pdst->vlc_state, psrc->vlc_state, sizeof(*pdst->vlc_state) * psrc->context_count);
362
+            }
363
+        }
364
+    }
329 365
 
330 366
     if (f->version > 2) {
331 367
         if (ffv1_init_slice_state(f, fs) < 0)
... ...
@@ -386,6 +422,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
386 386
 
387 387
     emms_c();
388 388
 
389
+    ff_thread_report_progress(&f->picture, si, 0);
390
+
389 391
     return 0;
390 392
 }
391 393
 
... ...
@@ -724,6 +762,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
724 724
     if ((ret = ffv1_init_slice_contexts(f)) < 0)
725 725
         return ret;
726 726
 
727
+    avctx->internal->allocate_progress = 1;
728
+
727 729
     return 0;
728 730
 }
729 731
 
... ...
@@ -744,6 +784,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
744 744
 
745 745
     f->cur = p = f->picture.f;
746 746
 
747
+    f->avctx = avctx;
747 748
     ff_init_range_decoder(c, buf, buf_size);
748 749
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
749 750
 
... ...
@@ -770,6 +811,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
770 770
         av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
771 771
                f->version, p->key_frame, f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);
772 772
 
773
+    ff_thread_finish_setup(avctx);
774
+
773 775
     buf_p = buf + buf_size;
774 776
     for (i = f->slice_count - 1; i >= 0; i--) {
775 777
         FFV1Context *fs = f->slice_context[i];
... ...
@@ -822,6 +865,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
822 822
         if (fs->slice_damaged && f->last_picture.f->data[0]) {
823 823
             const uint8_t *src[4];
824 824
             uint8_t *dst[4];
825
+            ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
825 826
             for (j = 0; j < 4; j++) {
826 827
                 int sh = (j==1 || j==2) ? f->chroma_h_shift : 0;
827 828
                 int sv = (j==1 || j==2) ? f->chroma_v_shift : 0;
... ...
@@ -837,6 +881,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
837 837
                           fs->slice_height);
838 838
         }
839 839
     }
840
+    ff_thread_report_progress(&f->picture, INT_MAX, 0);
840 841
 
841 842
     f->picture_number++;
842 843
 
... ...
@@ -854,18 +899,58 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
854 854
 static int init_thread_copy(AVCodecContext *avctx)
855 855
 {
856 856
     FFV1Context *f = avctx->priv_data;
857
-    int ret, i;
858 857
 
859
-    for (i = 0; i < f->quant_table_count; i++) {
860
-        void *p = f->initial_states[i];
861
-        f->initial_states[i] = av_malloc(f->context_count[i] * sizeof(*f->initial_states[i]));
862
-        if (!f->initial_states[i])
863
-            return AVERROR(ENOMEM);
864
-        memcpy(f->initial_states[i], p, f->context_count[i] * sizeof(*f->initial_states[i]));
858
+    f->picture.f      = NULL;
859
+    f->last_picture.f = NULL;
860
+    f->sample_buffer  = NULL;
861
+    f->quant_table_count = 0;
862
+    f->slice_count = 0;
863
+
864
+    return 0;
865
+}
866
+
867
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
868
+{
869
+    FFV1Context *fsrc = src->priv_data;
870
+    FFV1Context *fdst = dst->priv_data;
871
+    int i, ret;
872
+
873
+    if (dst == src)
874
+        return 0;
875
+
876
+    if (!fdst->quant_table_count) {
877
+        memcpy(fdst, fsrc, sizeof(*fdst));
878
+
879
+        for (i = 0; i < fdst->quant_table_count; i++) {
880
+            fdst->initial_states[i] = av_malloc(fdst->context_count[i] * sizeof(*fdst->initial_states[i]));
881
+            memcpy(fdst->initial_states[i], fsrc->initial_states[i], fdst->context_count[i] * sizeof(*fdst->initial_states[i]));
882
+        }
883
+
884
+        fdst->picture.f      = av_frame_alloc();
885
+        fdst->last_picture.f = av_frame_alloc();
886
+
887
+        if ((ret = ffv1_init_slice_contexts(fdst)) < 0)
888
+            return ret;
865 889
     }
866 890
 
867
-    if ((ret = ffv1_init_slice_contexts(f)) < 0)
868
-        return ret;
891
+    av_assert1(fdst->slice_count == fsrc->slice_count);
892
+
893
+    fdst->key_frame_ok = fsrc->key_frame_ok;
894
+
895
+    ff_thread_release_buffer(dst, &fdst->picture);
896
+    if (fsrc->picture.f->data[0]) {
897
+        if ((ret = ff_thread_ref_frame(&fdst->picture, &fsrc->picture)) < 0)
898
+            return ret;
899
+    }
900
+    for (i = 0; i < fdst->slice_count; i++) {
901
+        FFV1Context *fsdst = fdst->slice_context[i];
902
+        FFV1Context *fssrc = fsrc->slice_context[i];
903
+
904
+        fsdst->slice_damaged = fssrc->slice_damaged;
905
+    }
906
+
907
+    fdst->fsrc = fsrc;
908
+
869 909
     return 0;
870 910
 }
871 911
 
... ...
@@ -878,7 +963,8 @@ AVCodec ff_ffv1_decoder = {
878 878
     .close          = ffv1_close,
879 879
     .decode         = decode_frame,
880 880
     .init_thread_copy = init_thread_copy,
881
+    .update_thread_context = update_thread_context,
881 882
     .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ |
882
-                      CODEC_CAP_SLICE_THREADS,
883
+                      CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
883 884
     .long_name      = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
884 885
 };
... ...
@@ -652,7 +652,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
652 652
      */
653 653
 
654 654
     if (fctx->delaying) {
655
-        if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0;
655
+        if (fctx->next_decoding >= (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1))) fctx->delaying = 0;
656 656
 
657 657
         *got_picture_ptr=0;
658 658
         if (avpkt->size)