Signed-off-by: Paul B Mahol <onemda@gmail.com>
Paul B Mahol authored on 2015/10/04 18:34:031 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2015 Paul B Mahol |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#ifndef LIBAVFILTER_STEREO3D_H |
|
21 |
+#define LIBAVFILTER_STEREO3D_H |
|
22 |
+ |
|
23 |
+#include <stddef.h> |
|
24 |
+#include <stdint.h> |
|
25 |
+ |
|
26 |
+typedef struct Stereo3DDSPContext { |
|
27 |
+ void (*anaglyph)(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc, |
|
28 |
+ ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize, |
|
29 |
+ int width, int height, |
|
30 |
+ const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b); |
|
31 |
+} Stereo3DDSPContext; |
|
32 |
+ |
|
33 |
+void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp); |
|
34 |
+ |
|
35 |
+#endif /* LIBAVFILTER_STEREO3D_H */ |
... | ... |
@@ -30,6 +30,7 @@ |
30 | 30 |
#include "formats.h" |
31 | 31 |
#include "internal.h" |
32 | 32 |
#include "video.h" |
33 |
+#include "stereo3d.h" |
|
33 | 34 |
|
34 | 35 |
enum StereoCode { |
35 | 36 |
ANAGLYPH_RC_GRAY, // anaglyph red/cyan gray |
... | ... |
@@ -150,6 +151,7 @@ typedef struct Stereo3DContext { |
150 | 150 |
double ts_unit; |
151 | 151 |
int blanks; |
152 | 152 |
int in_off_left[4], in_off_right[4]; |
153 |
+ Stereo3DDSPContext dsp; |
|
153 | 154 |
} Stereo3DContext; |
154 | 155 |
|
155 | 156 |
#define OFFSET(x) offsetof(Stereo3DContext, x) |
... | ... |
@@ -300,6 +302,37 @@ static int query_formats(AVFilterContext *ctx) |
300 | 300 |
return ff_set_common_formats(ctx, fmts_list); |
301 | 301 |
} |
302 | 302 |
|
303 |
+static inline uint8_t ana_convert(const int *coeff, const uint8_t *left, const uint8_t *right) |
|
304 |
+{ |
|
305 |
+ int sum; |
|
306 |
+ |
|
307 |
+ sum = coeff[0] * left[0] + coeff[3] * right[0]; //red in |
|
308 |
+ sum += coeff[1] * left[1] + coeff[4] * right[1]; //green in |
|
309 |
+ sum += coeff[2] * left[2] + coeff[5] * right[2]; //blue in |
|
310 |
+ |
|
311 |
+ return av_clip_uint8(sum >> 16); |
|
312 |
+} |
|
313 |
+ |
|
314 |
+static void anaglyph(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc, |
|
315 |
+ ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize, |
|
316 |
+ int width, int height, |
|
317 |
+ const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b) |
|
318 |
+{ |
|
319 |
+ int x, y, o; |
|
320 |
+ |
|
321 |
+ for (y = 0; y < height; y++) { |
|
322 |
+ for (o = 0, x = 0; x < width; x++, o+= 3) { |
|
323 |
+ dst[o ] = ana_convert(ana_matrix_r, lsrc + o, rsrc + o); |
|
324 |
+ dst[o + 1] = ana_convert(ana_matrix_g, lsrc + o, rsrc + o); |
|
325 |
+ dst[o + 2] = ana_convert(ana_matrix_b, lsrc + o, rsrc + o); |
|
326 |
+ } |
|
327 |
+ |
|
328 |
+ dst += dst_linesize; |
|
329 |
+ lsrc += l_linesize; |
|
330 |
+ rsrc += r_linesize; |
|
331 |
+ } |
|
332 |
+} |
|
333 |
+ |
|
303 | 334 |
static int config_output(AVFilterLink *outlink) |
304 | 335 |
{ |
305 | 336 |
AVFilterContext *ctx = outlink->src; |
... | ... |
@@ -517,38 +550,11 @@ static int config_output(AVFilterLink *outlink) |
517 | 517 |
s->hsub = desc->log2_chroma_w; |
518 | 518 |
s->vsub = desc->log2_chroma_h; |
519 | 519 |
|
520 |
- return 0; |
|
521 |
-} |
|
522 |
- |
|
523 |
-static inline uint8_t ana_convert(const int *coeff, const uint8_t *left, const uint8_t *right) |
|
524 |
-{ |
|
525 |
- int sum; |
|
526 |
- |
|
527 |
- sum = coeff[0] * left[0] + coeff[3] * right[0]; //red in |
|
528 |
- sum += coeff[1] * left[1] + coeff[4] * right[1]; //green in |
|
529 |
- sum += coeff[2] * left[2] + coeff[5] * right[2]; //blue in |
|
530 |
- |
|
531 |
- return av_clip_uint8(sum >> 16); |
|
532 |
-} |
|
520 |
+ s->dsp.anaglyph = anaglyph; |
|
521 |
+ if (ARCH_X86) |
|
522 |
+ ff_stereo3d_init_x86(&s->dsp); |
|
533 | 523 |
|
534 |
-static void anaglyph(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc, |
|
535 |
- ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize, |
|
536 |
- int width, int height, |
|
537 |
- const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b) |
|
538 |
-{ |
|
539 |
- int x, y, o; |
|
540 |
- |
|
541 |
- for (y = 0; y < height; y++) { |
|
542 |
- for (o = 0, x = 0; x < width; x++, o+= 3) { |
|
543 |
- dst[o ] = ana_convert(ana_matrix_r, lsrc + o, rsrc + o); |
|
544 |
- dst[o + 1] = ana_convert(ana_matrix_g, lsrc + o, rsrc + o); |
|
545 |
- dst[o + 2] = ana_convert(ana_matrix_b, lsrc + o, rsrc + o); |
|
546 |
- } |
|
547 |
- |
|
548 |
- dst += dst_linesize; |
|
549 |
- lsrc += l_linesize; |
|
550 |
- rsrc += r_linesize; |
|
551 |
- } |
|
524 |
+ return 0; |
|
552 | 525 |
} |
553 | 526 |
|
554 | 527 |
typedef struct ThreadData { |
... | ... |
@@ -568,7 +574,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) |
568 | 568 |
int end = (height * (jobnr+1)) / nb_jobs; |
569 | 569 |
const int **ana_matrix = s->ana_matrix; |
570 | 570 |
|
571 |
- anaglyph(out->data[0] + out->linesize[0] * start, |
|
571 |
+ s->dsp.anaglyph(out->data[0] + out->linesize[0] * start, |
|
572 | 572 |
ileft ->data[0] + s->in_off_left [0] + ileft->linesize[0] * start * s->in.row_step, |
573 | 573 |
iright->data[0] + s->in_off_right[0] + iright->linesize[0] * start * s->in.row_step, |
574 | 574 |
out->linesize[0], |
... | ... |
@@ -13,6 +13,7 @@ OBJS-$(CONFIG_PULLUP_FILTER) += x86/vf_pullup_init.o |
13 | 13 |
OBJS-$(CONFIG_REMOVEGRAIN_FILTER) += x86/vf_removegrain_init.o |
14 | 14 |
OBJS-$(CONFIG_SPP_FILTER) += x86/vf_spp.o |
15 | 15 |
OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim_init.o |
16 |
+OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d_init.o |
|
16 | 17 |
OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend_init.o |
17 | 18 |
OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_tinterlace_init.o |
18 | 19 |
OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume_init.o |
... | ... |
@@ -32,6 +33,7 @@ ifdef CONFIG_GPL |
32 | 32 |
YASM-OBJS-$(CONFIG_REMOVEGRAIN_FILTER) += x86/vf_removegrain.o |
33 | 33 |
endif |
34 | 34 |
YASM-OBJS-$(CONFIG_SSIM_FILTER) += x86/vf_ssim.o |
35 |
+YASM-OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d.o |
|
35 | 36 |
YASM-OBJS-$(CONFIG_TBLEND_FILTER) += x86/vf_blend.o |
36 | 37 |
YASM-OBJS-$(CONFIG_TINTERLACE_FILTER) += x86/vf_interlace.o |
37 | 38 |
YASM-OBJS-$(CONFIG_VOLUME_FILTER) += x86/af_volume.o |
38 | 39 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,167 @@ |
0 |
+;***************************************************************************** |
|
1 |
+;* x86-optimized functions for stereo3d filter |
|
2 |
+;* |
|
3 |
+;* Copyright (C) 2015 Paul B Mahol |
|
4 |
+;* |
|
5 |
+;* This file is part of FFmpeg. |
|
6 |
+;* |
|
7 |
+;* FFmpeg is free software; you can redistribute it and/or |
|
8 |
+;* modify it under the terms of the GNU Lesser General Public |
|
9 |
+;* License as published by the Free Software Foundation; either |
|
10 |
+;* version 2.1 of the License, or (at your option) any later version. |
|
11 |
+;* |
|
12 |
+;* FFmpeg is distributed in the hope that it will be useful, |
|
13 |
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15 |
+;* Lesser General Public License for more details. |
|
16 |
+;* |
|
17 |
+;* You should have received a copy of the GNU Lesser General Public |
|
18 |
+;* License along with FFmpeg; if not, write to the Free Software |
|
19 |
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20 |
+;***************************************************************************** |
|
21 |
+ |
|
22 |
+%include "libavutil/x86/x86util.asm" |
|
23 |
+ |
|
24 |
+%if ARCH_X86_64 |
|
25 |
+ |
|
26 |
+SECTION_RODATA |
|
27 |
+ |
|
28 |
+; rgbrgbrgbrgb |
|
29 |
+; rrrrggggbbbb |
|
30 |
+ |
|
31 |
+shuf: db 0, 4, 8, 1,5, 9, 2, 6,10,3, 7,11,-1,-1,-1,-1 |
|
32 |
+ex_r: db 0,-1,-1,-1,3,-1,-1,-1,6,-1,-1,-1, 9,-1,-1,-1 |
|
33 |
+ex_g: db 1,-1,-1,-1,4,-1,-1,-1,7,-1,-1,-1,10,-1,-1,-1 |
|
34 |
+ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1 |
|
35 |
+ |
|
36 |
+SECTION .text |
|
37 |
+ |
|
38 |
+INIT_XMM sse4 |
|
39 |
+cglobal anaglyph, 11, 13, 16, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, r_linesize, width, height, ana_matrix_r, ana_matrix_g, ana_matrix_b |
|
40 |
+ movu m13, [ana_matrix_rq+ 0] |
|
41 |
+ movq m15, [ana_matrix_rq+16] |
|
42 |
+ pshufd m10, m13, q0000 |
|
43 |
+ pshufd m11, m13, q1111 |
|
44 |
+ pshufd m12, m13, q2222 |
|
45 |
+ pshufd m13, m13, q3333 |
|
46 |
+ pshufd m14, m15, q0000 |
|
47 |
+ pshufd m15, m15, q1111 |
|
48 |
+ mova [rsp+mmsize*0], m10 |
|
49 |
+ mova [rsp+mmsize*1], m11 |
|
50 |
+ mova [rsp+mmsize*2], m12 |
|
51 |
+ mova [rsp+mmsize*3], m13 |
|
52 |
+ mova [rsp+mmsize*4], m14 |
|
53 |
+ mova [rsp+mmsize*5], m15 |
|
54 |
+ |
|
55 |
+ movu m13, [ana_matrix_gq+ 0] |
|
56 |
+ movq m15, [ana_matrix_gq+16] |
|
57 |
+ pshufd m10, m13, q0000 |
|
58 |
+ pshufd m11, m13, q1111 |
|
59 |
+ pshufd m12, m13, q2222 |
|
60 |
+ pshufd m13, m13, q3333 |
|
61 |
+ pshufd m14, m15, q0000 |
|
62 |
+ pshufd m15, m15, q1111 |
|
63 |
+ mova [rsp+mmsize*6 ], m10 |
|
64 |
+ mova [rsp+mmsize*7 ], m11 |
|
65 |
+ mova [rsp+mmsize*8 ], m12 |
|
66 |
+ mova [rsp+mmsize*9 ], m13 |
|
67 |
+ mova [rsp+mmsize*10], m14 |
|
68 |
+ mova [rsp+mmsize*11], m15 |
|
69 |
+ |
|
70 |
+ movu m13, [ana_matrix_bq+ 0] |
|
71 |
+ movq m15, [ana_matrix_bq+16] |
|
72 |
+ pshufd m10, m13, q0000 |
|
73 |
+ pshufd m11, m13, q1111 |
|
74 |
+ pshufd m12, m13, q2222 |
|
75 |
+ pshufd m13, m13, q3333 |
|
76 |
+ pshufd m14, m15, q0000 |
|
77 |
+ pshufd m15, m15, q1111 |
|
78 |
+.nextrow: |
|
79 |
+ mov r11q, widthq |
|
80 |
+ mov r12q, 0 |
|
81 |
+ %define o r12q |
|
82 |
+ |
|
83 |
+ .loop: |
|
84 |
+ movu m0, [lsrcq+o+0] |
|
85 |
+ pshufb m1, m0, [ex_r] |
|
86 |
+ pshufb m2, m0, [ex_g] |
|
87 |
+ pshufb m3, m0, [ex_b] |
|
88 |
+ movu m0, [rsrcq+o+0] |
|
89 |
+ pshufb m4, m0, [ex_r] |
|
90 |
+ pshufb m5, m0, [ex_g] |
|
91 |
+ pshufb m6, m0, [ex_b] |
|
92 |
+ pmulld m1, [rsp+mmsize*0] |
|
93 |
+ pmulld m2, [rsp+mmsize*1] |
|
94 |
+ pmulld m3, [rsp+mmsize*2] |
|
95 |
+ pmulld m4, [rsp+mmsize*3] |
|
96 |
+ pmulld m5, [rsp+mmsize*4] |
|
97 |
+ pmulld m6, [rsp+mmsize*5] |
|
98 |
+ paddd m1, m2 |
|
99 |
+ paddd m3, m4 |
|
100 |
+ paddd m5, m6 |
|
101 |
+ paddd m1, m3 |
|
102 |
+ paddd m1, m5 |
|
103 |
+ |
|
104 |
+ movu m0, [lsrcq+o+0] |
|
105 |
+ pshufb m7, m0, [ex_r] |
|
106 |
+ pshufb m2, m0, [ex_g] |
|
107 |
+ pshufb m3, m0, [ex_b] |
|
108 |
+ movu m0, [rsrcq+o+0] |
|
109 |
+ pshufb m4, m0, [ex_r] |
|
110 |
+ pshufb m5, m0, [ex_g] |
|
111 |
+ pshufb m6, m0, [ex_b] |
|
112 |
+ pmulld m7, [rsp+mmsize*6] |
|
113 |
+ pmulld m2, [rsp+mmsize*7] |
|
114 |
+ pmulld m3, [rsp+mmsize*8] |
|
115 |
+ pmulld m4, [rsp+mmsize*9] |
|
116 |
+ pmulld m5, [rsp+mmsize*10] |
|
117 |
+ pmulld m6, [rsp+mmsize*11] |
|
118 |
+ paddd m7, m2 |
|
119 |
+ paddd m3, m4 |
|
120 |
+ paddd m5, m6 |
|
121 |
+ paddd m7, m3 |
|
122 |
+ paddd m7, m5 |
|
123 |
+ |
|
124 |
+ movu m0, [lsrcq+o+0] |
|
125 |
+ pshufb m8, m0, [ex_r] |
|
126 |
+ pshufb m2, m0, [ex_g] |
|
127 |
+ pshufb m3, m0, [ex_b] |
|
128 |
+ movu m0, [rsrcq+o+0] |
|
129 |
+ pshufb m4, m0, [ex_r] |
|
130 |
+ pshufb m5, m0, [ex_g] |
|
131 |
+ pshufb m6, m0, [ex_b] |
|
132 |
+ pmulld m8, m10 |
|
133 |
+ pmulld m2, m11 |
|
134 |
+ pmulld m3, m12 |
|
135 |
+ pmulld m4, m13 |
|
136 |
+ pmulld m5, m14 |
|
137 |
+ pmulld m6, m15 |
|
138 |
+ paddd m8, m2 |
|
139 |
+ paddd m3, m4 |
|
140 |
+ paddd m5, m6 |
|
141 |
+ paddd m8, m3 |
|
142 |
+ paddd m8, m5 |
|
143 |
+ |
|
144 |
+ psrld m1, 16 |
|
145 |
+ psrld m7, 16 |
|
146 |
+ psrld m8, 16 |
|
147 |
+ |
|
148 |
+ packusdw m1, m7 |
|
149 |
+ packusdw m8, m8 |
|
150 |
+ packuswb m1, m8 |
|
151 |
+ pshufb m1, [shuf] |
|
152 |
+ |
|
153 |
+ movq [dstq+o+0], m1 |
|
154 |
+ psrldq m1, 8 |
|
155 |
+ movd [dstq+o+8], m1 |
|
156 |
+ add r12d, 12 |
|
157 |
+ sub r11d, 4 |
|
158 |
+ jg .loop |
|
159 |
+ |
|
160 |
+ add dstq, dst_linesizeq |
|
161 |
+ add lsrcq, l_linesizeq |
|
162 |
+ add rsrcq, r_linesizeq |
|
163 |
+ sub heightd, 1 |
|
164 |
+ jg .nextrow |
|
165 |
+REP_RET |
|
166 |
+%endif |
0 | 167 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,37 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2015 Paul B Mahol |
|
2 |
+ * |
|
3 |
+ * This file is part of FFmpeg. |
|
4 |
+ * |
|
5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
7 |
+ * License as published by the Free Software Foundation; either |
|
8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 |
+ * Lesser General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include "libavutil/x86/cpu.h" |
|
21 |
+ |
|
22 |
+#include "libavfilter/stereo3d.h" |
|
23 |
+ |
|
24 |
+void ff_anaglyph_sse4(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc, |
|
25 |
+ ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize, |
|
26 |
+ int width, int height, |
|
27 |
+ const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b); |
|
28 |
+ |
|
29 |
+void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp) |
|
30 |
+{ |
|
31 |
+ int cpu_flags = av_get_cpu_flags(); |
|
32 |
+ |
|
33 |
+ if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags)) { |
|
34 |
+ dsp->anaglyph = ff_anaglyph_sse4; |
|
35 |
+ } |
|
36 |
+} |