This is about 20-30% faster than slice threading
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -326,6 +326,42 @@ static int decode_slice(AVCodecContext *c, void *arg) |
326 | 326 |
int width, height, x, y, ret; |
327 | 327 |
const int ps = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1; |
328 | 328 |
AVFrame * const p = f->cur; |
329 |
+ int i, si; |
|
330 |
+ |
|
331 |
+ for( si=0; fs != f->slice_context[si]; si ++) |
|
332 |
+ ; |
|
333 |
+ |
|
334 |
+ if(f->fsrc && !p->key_frame) |
|
335 |
+ ff_thread_await_progress(&f->last_picture, si, 0); |
|
336 |
+ |
|
337 |
+ if(f->fsrc && !p->key_frame) { |
|
338 |
+ FFV1Context *fssrc = f->fsrc->slice_context[si]; |
|
339 |
+ FFV1Context *fsdst = f->slice_context[si]; |
|
340 |
+ av_assert1(fsdst->plane_count == fssrc->plane_count); |
|
341 |
+ av_assert1(fsdst == fs); |
|
342 |
+ |
|
343 |
+ if (!p->key_frame) |
|
344 |
+ fsdst->slice_damaged |= fssrc->slice_damaged; |
|
345 |
+ |
|
346 |
+ for (i = 0; i < f->plane_count; i++) { |
|
347 |
+ PlaneContext *psrc = &fssrc->plane[i]; |
|
348 |
+ PlaneContext *pdst = &fsdst->plane[i]; |
|
349 |
+ |
|
350 |
+ av_free(pdst->state); |
|
351 |
+ av_free(pdst->vlc_state); |
|
352 |
+ memcpy(pdst, psrc, sizeof(*pdst)); |
|
353 |
+ pdst->state = NULL; |
|
354 |
+ pdst->vlc_state = NULL; |
|
355 |
+ |
|
356 |
+ if (fssrc->ac) { |
|
357 |
+ pdst->state = av_malloc(CONTEXT_SIZE * psrc->context_count); |
|
358 |
+ memcpy(pdst->state, psrc->state, CONTEXT_SIZE * psrc->context_count); |
|
359 |
+ } else { |
|
360 |
+ pdst->vlc_state = av_malloc(sizeof(*pdst->vlc_state) * psrc->context_count); |
|
361 |
+ memcpy(pdst->vlc_state, psrc->vlc_state, sizeof(*pdst->vlc_state) * psrc->context_count); |
|
362 |
+ } |
|
363 |
+ } |
|
364 |
+ } |
|
329 | 365 |
|
330 | 366 |
if (f->version > 2) { |
331 | 367 |
if (ffv1_init_slice_state(f, fs) < 0) |
... | ... |
@@ -386,6 +422,8 @@ static int decode_slice(AVCodecContext *c, void *arg) |
386 | 386 |
|
387 | 387 |
emms_c(); |
388 | 388 |
|
389 |
+ ff_thread_report_progress(&f->picture, si, 0); |
|
390 |
+ |
|
389 | 391 |
return 0; |
390 | 392 |
} |
391 | 393 |
|
... | ... |
@@ -724,6 +762,8 @@ static av_cold int decode_init(AVCodecContext *avctx) |
724 | 724 |
if ((ret = ffv1_init_slice_contexts(f)) < 0) |
725 | 725 |
return ret; |
726 | 726 |
|
727 |
+ avctx->internal->allocate_progress = 1; |
|
728 |
+ |
|
727 | 729 |
return 0; |
728 | 730 |
} |
729 | 731 |
|
... | ... |
@@ -744,6 +784,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac |
744 | 744 |
|
745 | 745 |
f->cur = p = f->picture.f; |
746 | 746 |
|
747 |
+ f->avctx = avctx; |
|
747 | 748 |
ff_init_range_decoder(c, buf, buf_size); |
748 | 749 |
ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8); |
749 | 750 |
|
... | ... |
@@ -770,6 +811,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac |
770 | 770 |
av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n", |
771 | 771 |
f->version, p->key_frame, f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample); |
772 | 772 |
|
773 |
+ ff_thread_finish_setup(avctx); |
|
774 |
+ |
|
773 | 775 |
buf_p = buf + buf_size; |
774 | 776 |
for (i = f->slice_count - 1; i >= 0; i--) { |
775 | 777 |
FFV1Context *fs = f->slice_context[i]; |
... | ... |
@@ -822,6 +865,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac |
822 | 822 |
if (fs->slice_damaged && f->last_picture.f->data[0]) { |
823 | 823 |
const uint8_t *src[4]; |
824 | 824 |
uint8_t *dst[4]; |
825 |
+ ff_thread_await_progress(&f->last_picture, INT_MAX, 0); |
|
825 | 826 |
for (j = 0; j < 4; j++) { |
826 | 827 |
int sh = (j==1 || j==2) ? f->chroma_h_shift : 0; |
827 | 828 |
int sv = (j==1 || j==2) ? f->chroma_v_shift : 0; |
... | ... |
@@ -837,6 +881,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac |
837 | 837 |
fs->slice_height); |
838 | 838 |
} |
839 | 839 |
} |
840 |
+ ff_thread_report_progress(&f->picture, INT_MAX, 0); |
|
840 | 841 |
|
841 | 842 |
f->picture_number++; |
842 | 843 |
|
... | ... |
@@ -854,18 +899,58 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac |
854 | 854 |
static int init_thread_copy(AVCodecContext *avctx) |
855 | 855 |
{ |
856 | 856 |
FFV1Context *f = avctx->priv_data; |
857 |
- int ret, i; |
|
858 | 857 |
|
859 |
- for (i = 0; i < f->quant_table_count; i++) { |
|
860 |
- void *p = f->initial_states[i]; |
|
861 |
- f->initial_states[i] = av_malloc(f->context_count[i] * sizeof(*f->initial_states[i])); |
|
862 |
- if (!f->initial_states[i]) |
|
863 |
- return AVERROR(ENOMEM); |
|
864 |
- memcpy(f->initial_states[i], p, f->context_count[i] * sizeof(*f->initial_states[i])); |
|
858 |
+ f->picture.f = NULL; |
|
859 |
+ f->last_picture.f = NULL; |
|
860 |
+ f->sample_buffer = NULL; |
|
861 |
+ f->quant_table_count = 0; |
|
862 |
+ f->slice_count = 0; |
|
863 |
+ |
|
864 |
+ return 0; |
|
865 |
+} |
|
866 |
+ |
|
867 |
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src) |
|
868 |
+{ |
|
869 |
+ FFV1Context *fsrc = src->priv_data; |
|
870 |
+ FFV1Context *fdst = dst->priv_data; |
|
871 |
+ int i, ret; |
|
872 |
+ |
|
873 |
+ if (dst == src) |
|
874 |
+ return 0; |
|
875 |
+ |
|
876 |
+ if (!fdst->quant_table_count) { |
|
877 |
+ memcpy(fdst, fsrc, sizeof(*fdst)); |
|
878 |
+ |
|
879 |
+ for (i = 0; i < fdst->quant_table_count; i++) { |
|
880 |
+ fdst->initial_states[i] = av_malloc(fdst->context_count[i] * sizeof(*fdst->initial_states[i])); |
|
881 |
+ memcpy(fdst->initial_states[i], fsrc->initial_states[i], fdst->context_count[i] * sizeof(*fdst->initial_states[i])); |
|
882 |
+ } |
|
883 |
+ |
|
884 |
+ fdst->picture.f = av_frame_alloc(); |
|
885 |
+ fdst->last_picture.f = av_frame_alloc(); |
|
886 |
+ |
|
887 |
+ if ((ret = ffv1_init_slice_contexts(fdst)) < 0) |
|
888 |
+ return ret; |
|
865 | 889 |
} |
866 | 890 |
|
867 |
- if ((ret = ffv1_init_slice_contexts(f)) < 0) |
|
868 |
- return ret; |
|
891 |
+ av_assert1(fdst->slice_count == fsrc->slice_count); |
|
892 |
+ |
|
893 |
+ fdst->key_frame_ok = fsrc->key_frame_ok; |
|
894 |
+ |
|
895 |
+ ff_thread_release_buffer(dst, &fdst->picture); |
|
896 |
+ if (fsrc->picture.f->data[0]) { |
|
897 |
+ if ((ret = ff_thread_ref_frame(&fdst->picture, &fsrc->picture)) < 0) |
|
898 |
+ return ret; |
|
899 |
+ } |
|
900 |
+ for (i = 0; i < fdst->slice_count; i++) { |
|
901 |
+ FFV1Context *fsdst = fdst->slice_context[i]; |
|
902 |
+ FFV1Context *fssrc = fsrc->slice_context[i]; |
|
903 |
+ |
|
904 |
+ fsdst->slice_damaged = fssrc->slice_damaged; |
|
905 |
+ } |
|
906 |
+ |
|
907 |
+ fdst->fsrc = fsrc; |
|
908 |
+ |
|
869 | 909 |
return 0; |
870 | 910 |
} |
871 | 911 |
|
... | ... |
@@ -878,7 +963,8 @@ AVCodec ff_ffv1_decoder = { |
878 | 878 |
.close = ffv1_close, |
879 | 879 |
.decode = decode_frame, |
880 | 880 |
.init_thread_copy = init_thread_copy, |
881 |
+ .update_thread_context = update_thread_context, |
|
881 | 882 |
.capabilities = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ | |
882 |
- CODEC_CAP_SLICE_THREADS, |
|
883 |
+ CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS, |
|
883 | 884 |
.long_name = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"), |
884 | 885 |
}; |
... | ... |
@@ -652,7 +652,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx, |
652 | 652 |
*/ |
653 | 653 |
|
654 | 654 |
if (fctx->delaying) { |
655 |
- if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0; |
|
655 |
+ if (fctx->next_decoding >= (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1))) fctx->delaying = 0; |
|
656 | 656 |
|
657 | 657 |
*got_picture_ptr=0; |
658 | 658 |
if (avpkt->size) |