~5-15% faster overall with main input without alpha.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
... | ... |
@@ -416,7 +416,7 @@ static int config_output(AVFilterLink *outlink) |
416 | 416 |
|
417 | 417 |
static void blend_image_packed_rgb(AVFilterContext *ctx, |
418 | 418 |
AVFrame *dst, const AVFrame *src, |
419 |
- int x, int y) |
|
419 |
+ int main_has_alpha, int x, int y) |
|
420 | 420 |
{ |
421 | 421 |
OverlayContext *s = ctx->priv; |
422 | 422 |
int i, imax, j, jmax; |
... | ... |
@@ -435,7 +435,6 @@ static void blend_image_packed_rgb(AVFilterContext *ctx, |
435 | 435 |
const int sb = s->overlay_rgba_map[B]; |
436 | 436 |
const int sa = s->overlay_rgba_map[A]; |
437 | 437 |
const int sstep = s->overlay_pix_step[0]; |
438 |
- const int main_has_alpha = s->main_has_alpha; |
|
439 | 438 |
uint8_t *S, *sp, *d, *dp; |
440 | 439 |
|
441 | 440 |
i = FFMAX(-y, 0); |
... | ... |
@@ -634,11 +633,11 @@ static av_always_inline void blend_image_yuv(AVFilterContext *ctx, |
634 | 634 |
s->main_desc->comp[2].plane, s->main_desc->comp[2].offset, s->main_desc->comp[2].step); |
635 | 635 |
} |
636 | 636 |
|
637 |
-static av_always_inline void blend_image_rgb(AVFilterContext *ctx, |
|
638 |
- AVFrame *dst, const AVFrame *src, |
|
639 |
- int hsub, int vsub, |
|
640 |
- int main_has_alpha, |
|
641 |
- int x, int y) |
|
637 |
+static av_always_inline void blend_image_planar_rgb(AVFilterContext *ctx, |
|
638 |
+ AVFrame *dst, const AVFrame *src, |
|
639 |
+ int hsub, int vsub, |
|
640 |
+ int main_has_alpha, |
|
641 |
+ int x, int y) |
|
642 | 642 |
{ |
643 | 643 |
OverlayContext *s = ctx->priv; |
644 | 644 |
const int src_w = src->width; |
... | ... |
@@ -659,30 +658,52 @@ static av_always_inline void blend_image_rgb(AVFilterContext *ctx, |
659 | 659 |
|
660 | 660 |
static void blend_image_yuv420(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
661 | 661 |
{ |
662 |
- OverlayContext *s = ctx->priv; |
|
662 |
+ blend_image_yuv(ctx, dst, src, 1, 1, 0, x, y); |
|
663 |
+} |
|
663 | 664 |
|
664 |
- blend_image_yuv(ctx, dst, src, 1, 1, s->main_has_alpha, x, y); |
|
665 |
+static void blend_image_yuva420(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
|
666 |
+{ |
|
667 |
+ blend_image_yuv(ctx, dst, src, 1, 1, 1, x, y); |
|
665 | 668 |
} |
666 | 669 |
|
667 | 670 |
static void blend_image_yuv422(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
668 | 671 |
{ |
669 |
- OverlayContext *s = ctx->priv; |
|
672 |
+ blend_image_yuv(ctx, dst, src, 1, 0, 0, x, y); |
|
673 |
+} |
|
670 | 674 |
|
671 |
- blend_image_yuv(ctx, dst, src, 1, 0, s->main_has_alpha, x, y); |
|
675 |
+static void blend_image_yuva422(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
|
676 |
+{ |
|
677 |
+ blend_image_yuv(ctx, dst, src, 1, 0, 1, x, y); |
|
672 | 678 |
} |
673 | 679 |
|
674 | 680 |
static void blend_image_yuv444(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
675 | 681 |
{ |
676 |
- OverlayContext *s = ctx->priv; |
|
682 |
+ blend_image_yuv(ctx, dst, src, 0, 0, 0, x, y); |
|
683 |
+} |
|
677 | 684 |
|
678 |
- blend_image_yuv(ctx, dst, src, 0, 0, s->main_has_alpha, x, y); |
|
685 |
+static void blend_image_yuva444(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
|
686 |
+{ |
|
687 |
+ blend_image_yuv(ctx, dst, src, 0, 0, 1, x, y); |
|
679 | 688 |
} |
680 | 689 |
|
681 | 690 |
static void blend_image_gbrp(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
682 | 691 |
{ |
683 |
- OverlayContext *s = ctx->priv; |
|
692 |
+ blend_image_planar_rgb(ctx, dst, src, 0, 0, 0, x, y); |
|
693 |
+} |
|
684 | 694 |
|
685 |
- blend_image_rgb(ctx, dst, src, 0, 0, s->main_has_alpha, x, y); |
|
695 |
+static void blend_image_gbrap(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
|
696 |
+{ |
|
697 |
+ blend_image_planar_rgb(ctx, dst, src, 0, 0, 1, x, y); |
|
698 |
+} |
|
699 |
+ |
|
700 |
+static void blend_image_rgb(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
|
701 |
+{ |
|
702 |
+ blend_image_packed_rgb(ctx, dst, src, 0, x, y); |
|
703 |
+} |
|
704 |
+ |
|
705 |
+static void blend_image_rgba(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y) |
|
706 |
+{ |
|
707 |
+ blend_image_packed_rgb(ctx, dst, src, 1, x, y); |
|
686 | 708 |
} |
687 | 709 |
|
688 | 710 |
static int config_input_main(AVFilterLink *inlink) |
... | ... |
@@ -702,39 +723,39 @@ static int config_input_main(AVFilterLink *inlink) |
702 | 702 |
s->main_has_alpha = ff_fmt_is_in(inlink->format, alpha_pix_fmts); |
703 | 703 |
switch (s->format) { |
704 | 704 |
case OVERLAY_FORMAT_YUV420: |
705 |
- s->blend_image = blend_image_yuv420; |
|
705 |
+ s->blend_image = s->main_has_alpha ? blend_image_yuva420 : blend_image_yuv420; |
|
706 | 706 |
break; |
707 | 707 |
case OVERLAY_FORMAT_YUV422: |
708 |
- s->blend_image = blend_image_yuv422; |
|
708 |
+ s->blend_image = s->main_has_alpha ? blend_image_yuva422 : blend_image_yuv422; |
|
709 | 709 |
break; |
710 | 710 |
case OVERLAY_FORMAT_YUV444: |
711 |
- s->blend_image = blend_image_yuv444; |
|
711 |
+ s->blend_image = s->main_has_alpha ? blend_image_yuva444 : blend_image_yuv444; |
|
712 | 712 |
break; |
713 | 713 |
case OVERLAY_FORMAT_RGB: |
714 |
- s->blend_image = blend_image_packed_rgb; |
|
714 |
+ s->blend_image = s->main_has_alpha ? blend_image_rgba : blend_image_rgb; |
|
715 | 715 |
break; |
716 | 716 |
case OVERLAY_FORMAT_GBRP: |
717 |
- s->blend_image = blend_image_gbrp; |
|
717 |
+ s->blend_image = s->main_has_alpha ? blend_image_gbrap : blend_image_gbrp; |
|
718 | 718 |
break; |
719 | 719 |
case OVERLAY_FORMAT_AUTO: |
720 | 720 |
switch (inlink->format) { |
721 | 721 |
case AV_PIX_FMT_YUVA420P: |
722 |
- s->blend_image = blend_image_yuv420; |
|
722 |
+ s->blend_image = blend_image_yuva420; |
|
723 | 723 |
break; |
724 | 724 |
case AV_PIX_FMT_YUVA422P: |
725 |
- s->blend_image = blend_image_yuv422; |
|
725 |
+ s->blend_image = blend_image_yuva422; |
|
726 | 726 |
break; |
727 | 727 |
case AV_PIX_FMT_YUVA444P: |
728 |
- s->blend_image = blend_image_yuv444; |
|
728 |
+ s->blend_image = blend_image_yuva444; |
|
729 | 729 |
break; |
730 | 730 |
case AV_PIX_FMT_ARGB: |
731 | 731 |
case AV_PIX_FMT_RGBA: |
732 | 732 |
case AV_PIX_FMT_BGRA: |
733 | 733 |
case AV_PIX_FMT_ABGR: |
734 |
- s->blend_image = blend_image_packed_rgb; |
|
734 |
+ s->blend_image = blend_image_rgba; |
|
735 | 735 |
break; |
736 | 736 |
case AV_PIX_FMT_GBRAP: |
737 |
- s->blend_image = blend_image_gbrp; |
|
737 |
+ s->blend_image = blend_image_gbrap; |
|
738 | 738 |
break; |
739 | 739 |
default: |
740 | 740 |
av_assert0(0); |