Reviewed-by: Wei Gao <highgod0401@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
/* |
2 | 2 |
* Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com> |
3 |
+ * Copyright (C) 2013 Lenny Wang |
|
3 | 4 |
* |
4 | 5 |
* This file is part of FFmpeg. |
5 | 6 |
* |
... | ... |
@@ -57,12 +58,8 @@ typedef struct { |
57 | 57 |
typedef struct { |
58 | 58 |
cl_command_queue command_queue; |
59 | 59 |
cl_program program; |
60 |
- cl_kernel kernel; |
|
61 |
- size_t matrix_size; |
|
62 |
- float matrix_y[9]; |
|
63 |
- float matrix_uv[9]; |
|
64 |
- cl_mem cl_matrix_y; |
|
65 |
- cl_mem cl_matrix_uv; |
|
60 |
+ cl_kernel kernel_luma; |
|
61 |
+ cl_kernel kernel_chroma; |
|
66 | 62 |
int in_plane_size[8]; |
67 | 63 |
int out_plane_size[8]; |
68 | 64 |
int plane_num; |
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
/* |
2 | 2 |
* Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com> |
3 |
+ * Copyright (C) 2013 Lenny Wang |
|
3 | 4 |
* |
4 | 5 |
* This file is part of FFmpeg. |
5 | 6 |
* |
... | ... |
@@ -29,8 +30,8 @@ |
29 | 29 |
#include "deshake_opencl.h" |
30 | 30 |
#include "libavutil/opencl_internal.h" |
31 | 31 |
|
32 |
-#define MATRIX_SIZE 6 |
|
33 | 32 |
#define PLANE_NUM 3 |
33 |
+#define ROUND_TO_16(a) ((((a - 1)/16)+1)*16) |
|
34 | 34 |
|
35 | 35 |
int ff_opencl_transform(AVFilterContext *ctx, |
36 | 36 |
int width, int height, int cw, int ch, |
... | ... |
@@ -39,29 +40,40 @@ int ff_opencl_transform(AVFilterContext *ctx, |
39 | 39 |
enum FillMethod fill, AVFrame *in, AVFrame *out) |
40 | 40 |
{ |
41 | 41 |
int ret = 0; |
42 |
- const size_t global_work_size = width * height + 2 * ch * cw; |
|
43 | 42 |
cl_int status; |
44 | 43 |
DeshakeContext *deshake = ctx->priv; |
45 |
- FFOpenclParam opencl_param = {0}; |
|
46 |
- |
|
47 |
- opencl_param.ctx = ctx; |
|
48 |
- opencl_param.kernel = deshake->opencl_ctx.kernel; |
|
49 |
- ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_y, (uint8_t *)matrix_y, deshake->opencl_ctx.matrix_size * sizeof(cl_float)); |
|
50 |
- if (ret < 0) |
|
51 |
- return ret; |
|
52 |
- ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_uv, (uint8_t *)matrix_uv, deshake->opencl_ctx.matrix_size * sizeof(cl_float)); |
|
53 |
- if (ret < 0) |
|
54 |
- return ret; |
|
44 |
+ float4 packed_matrix_lu = {matrix_y[0], matrix_y[1], matrix_y[2], matrix_y[5]}; |
|
45 |
+ float4 packed_matrix_ch = {matrix_uv[0], matrix_uv[1], matrix_uv[2], matrix_uv[5]}; |
|
46 |
+ size_t global_worksize_lu[2] = {(size_t)ROUND_TO_16(width), (size_t)ROUND_TO_16(height)}; |
|
47 |
+ size_t global_worksize_ch[2] = {(size_t)ROUND_TO_16(cw), (size_t)(2*ROUND_TO_16(ch))}; |
|
48 |
+ size_t local_worksize[2] = {16, 16}; |
|
49 |
+ FFOpenclParam param_lu = {0}; |
|
50 |
+ FFOpenclParam param_ch = {0}; |
|
51 |
+ param_lu.ctx = param_ch.ctx = ctx; |
|
52 |
+ param_lu.kernel = deshake->opencl_ctx.kernel_luma; |
|
53 |
+ param_ch.kernel = deshake->opencl_ctx.kernel_chroma; |
|
55 | 54 |
|
56 | 55 |
if ((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) { |
57 | 56 |
av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n"); |
58 | 57 |
return AVERROR(EINVAL); |
59 | 58 |
} |
60 |
- ret = ff_opencl_set_parameter(&opencl_param, |
|
59 |
+ ret = ff_opencl_set_parameter(¶m_lu, |
|
60 |
+ FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf), |
|
61 |
+ FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf), |
|
62 |
+ FF_OPENCL_PARAM_INFO(packed_matrix_lu), |
|
63 |
+ FF_OPENCL_PARAM_INFO(interpolate), |
|
64 |
+ FF_OPENCL_PARAM_INFO(fill), |
|
65 |
+ FF_OPENCL_PARAM_INFO(in->linesize[0]), |
|
66 |
+ FF_OPENCL_PARAM_INFO(out->linesize[0]), |
|
67 |
+ FF_OPENCL_PARAM_INFO(height), |
|
68 |
+ FF_OPENCL_PARAM_INFO(width), |
|
69 |
+ NULL); |
|
70 |
+ if (ret < 0) |
|
71 |
+ return ret; |
|
72 |
+ ret = ff_opencl_set_parameter(¶m_ch, |
|
61 | 73 |
FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf), |
62 | 74 |
FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf), |
63 |
- FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_matrix_y), |
|
64 |
- FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_matrix_uv), |
|
75 |
+ FF_OPENCL_PARAM_INFO(packed_matrix_ch), |
|
65 | 76 |
FF_OPENCL_PARAM_INFO(interpolate), |
66 | 77 |
FF_OPENCL_PARAM_INFO(fill), |
67 | 78 |
FF_OPENCL_PARAM_INFO(in->linesize[0]), |
... | ... |
@@ -76,13 +88,15 @@ int ff_opencl_transform(AVFilterContext *ctx, |
76 | 76 |
if (ret < 0) |
77 | 77 |
return ret; |
78 | 78 |
status = clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue, |
79 |
- deshake->opencl_ctx.kernel, 1, NULL, |
|
80 |
- &global_work_size, NULL, 0, NULL, NULL); |
|
79 |
+ deshake->opencl_ctx.kernel_luma, 2, NULL, |
|
80 |
+ global_worksize_lu, local_worksize, 0, NULL, NULL); |
|
81 |
+ status |= clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue, |
|
82 |
+ deshake->opencl_ctx.kernel_chroma, 2, NULL, |
|
83 |
+ global_worksize_ch, local_worksize, 0, NULL, NULL); |
|
81 | 84 |
if (status != CL_SUCCESS) { |
82 | 85 |
av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status)); |
83 | 86 |
return AVERROR_EXTERNAL; |
84 | 87 |
} |
85 |
- clFinish(deshake->opencl_ctx.command_queue); |
|
86 | 88 |
ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size, |
87 | 89 |
deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf, |
88 | 90 |
deshake->opencl_ctx.cl_outbuf_size); |
... | ... |
@@ -98,16 +112,7 @@ int ff_opencl_deshake_init(AVFilterContext *ctx) |
98 | 98 |
ret = av_opencl_init(NULL); |
99 | 99 |
if (ret < 0) |
100 | 100 |
return ret; |
101 |
- deshake->opencl_ctx.matrix_size = MATRIX_SIZE; |
|
102 |
- deshake->opencl_ctx.plane_num = PLANE_NUM; |
|
103 |
- ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_y, |
|
104 |
- deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL); |
|
105 |
- if (ret < 0) |
|
106 |
- return ret; |
|
107 |
- ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_uv, |
|
108 |
- deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL); |
|
109 |
- if (ret < 0) |
|
110 |
- return ret; |
|
101 |
+ deshake->opencl_ctx.plane_num = PLANE_NUM; |
|
111 | 102 |
deshake->opencl_ctx.command_queue = av_opencl_get_command_queue(); |
112 | 103 |
if (!deshake->opencl_ctx.command_queue) { |
113 | 104 |
av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'deshake'\n"); |
... | ... |
@@ -118,10 +123,19 @@ int ff_opencl_deshake_init(AVFilterContext *ctx) |
118 | 118 |
av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'avfilter_transform'\n"); |
119 | 119 |
return AVERROR(EINVAL); |
120 | 120 |
} |
121 |
- if (!deshake->opencl_ctx.kernel) { |
|
122 |
- deshake->opencl_ctx.kernel = clCreateKernel(deshake->opencl_ctx.program, "avfilter_transform", &ret); |
|
121 |
+ if (!deshake->opencl_ctx.kernel_luma) { |
|
122 |
+ deshake->opencl_ctx.kernel_luma = clCreateKernel(deshake->opencl_ctx.program, |
|
123 |
+ "avfilter_transform_luma", &ret); |
|
124 |
+ if (ret != CL_SUCCESS) { |
|
125 |
+ av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_luma'\n"); |
|
126 |
+ return AVERROR(EINVAL); |
|
127 |
+ } |
|
128 |
+ } |
|
129 |
+ if (!deshake->opencl_ctx.kernel_chroma) { |
|
130 |
+ deshake->opencl_ctx.kernel_chroma = clCreateKernel(deshake->opencl_ctx.program, |
|
131 |
+ "avfilter_transform_chroma", &ret); |
|
123 | 132 |
if (ret != CL_SUCCESS) { |
124 |
- av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform'\n"); |
|
133 |
+ av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_chroma'\n"); |
|
125 | 134 |
return AVERROR(EINVAL); |
126 | 135 |
} |
127 | 136 |
} |
... | ... |
@@ -133,9 +147,8 @@ void ff_opencl_deshake_uninit(AVFilterContext *ctx) |
133 | 133 |
DeshakeContext *deshake = ctx->priv; |
134 | 134 |
av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf); |
135 | 135 |
av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf); |
136 |
- av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_y); |
|
137 |
- av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_uv); |
|
138 |
- clReleaseKernel(deshake->opencl_ctx.kernel); |
|
136 |
+ clReleaseKernel(deshake->opencl_ctx.kernel_luma); |
|
137 |
+ clReleaseKernel(deshake->opencl_ctx.kernel_chroma); |
|
139 | 138 |
clReleaseProgram(deshake->opencl_ctx.program); |
140 | 139 |
deshake->opencl_ctx.command_queue = NULL; |
141 | 140 |
av_opencl_uninit(); |
... | ... |
@@ -1,5 +1,6 @@ |
1 | 1 |
/* |
2 | 2 |
* Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com> |
3 |
+ * Copyright (C) 2013 Lenny Wang |
|
3 | 4 |
* |
4 | 5 |
* |
5 | 6 |
* This file is part of FFmpeg. |
... | ... |
@@ -25,16 +26,16 @@ |
25 | 25 |
#include "libavutil/opencl.h" |
26 | 26 |
|
27 | 27 |
const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL( |
28 |
- |
|
29 |
-inline unsigned char pixel(global const unsigned char *src, float x, float y, |
|
28 |
+inline unsigned char pixel(global const unsigned char *src, int x, int y, |
|
30 | 29 |
int w, int h,int stride, unsigned char def) |
31 | 30 |
{ |
32 |
- return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride]; |
|
31 |
+ return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[x + y * stride]; |
|
33 | 32 |
} |
33 |
+ |
|
34 | 34 |
unsigned char interpolate_nearest(float x, float y, global const unsigned char *src, |
35 | 35 |
int width, int height, int stride, unsigned char def) |
36 | 36 |
{ |
37 |
- return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def); |
|
37 |
+ return pixel(src, (int)(x + 0.5f), (int)(y + 0.5f), width, height, stride, def); |
|
38 | 38 |
} |
39 | 39 |
|
40 | 40 |
unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src, |
... | ... |
@@ -42,21 +43,18 @@ unsigned char interpolate_bilinear(float x, float y, global const unsigned char |
42 | 42 |
{ |
43 | 43 |
int x_c, x_f, y_c, y_f; |
44 | 44 |
int v1, v2, v3, v4; |
45 |
+ x_f = (int)x; |
|
46 |
+ y_f = (int)y; |
|
47 |
+ x_c = x_f + 1; |
|
48 |
+ y_c = y_f + 1; |
|
45 | 49 |
|
46 |
- if (x < -1 || x > width || y < -1 || y > height) { |
|
50 |
+ if (x_f < -1 || x_f > width || y_f < -1 || y_f > height) { |
|
47 | 51 |
return def; |
48 | 52 |
} else { |
49 |
- x_f = (int)x; |
|
50 |
- x_c = x_f + 1; |
|
51 |
- |
|
52 |
- y_f = (int)y; |
|
53 |
- y_c = y_f + 1; |
|
54 |
- |
|
55 |
- v1 = pixel(src, x_c, y_c, width, height, stride, def); |
|
53 |
+ v4 = pixel(src, x_f, y_f, width, height, stride, def); |
|
56 | 54 |
v2 = pixel(src, x_c, y_f, width, height, stride, def); |
57 | 55 |
v3 = pixel(src, x_f, y_c, width, height, stride, def); |
58 |
- v4 = pixel(src, x_f, y_f, width, height, stride, def); |
|
59 |
- |
|
56 |
+ v1 = pixel(src, x_c, y_c, width, height, stride, def); |
|
60 | 57 |
return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) + |
61 | 58 |
v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y))); |
62 | 59 |
} |
... | ... |
@@ -68,19 +66,18 @@ unsigned char interpolate_biquadratic(float x, float y, global const unsigned ch |
68 | 68 |
int x_c, x_f, y_c, y_f; |
69 | 69 |
unsigned char v1, v2, v3, v4; |
70 | 70 |
float f1, f2, f3, f4; |
71 |
+ x_f = (int)x; |
|
72 |
+ y_f = (int)y; |
|
73 |
+ x_c = x_f + 1; |
|
74 |
+ y_c = y_f + 1; |
|
71 | 75 |
|
72 |
- if (x < - 1 || x > width || y < -1 || y > height) |
|
76 |
+ if (x_f < - 1 || x_f > width || y_f < -1 || y_f > height) |
|
73 | 77 |
return def; |
74 | 78 |
else { |
75 |
- x_f = (int)x; |
|
76 |
- x_c = x_f + 1; |
|
77 |
- y_f = (int)y; |
|
78 |
- y_c = y_f + 1; |
|
79 |
- |
|
80 |
- v1 = pixel(src, x_c, y_c, width, height, stride, def); |
|
79 |
+ v4 = pixel(src, x_f, y_f, width, height, stride, def); |
|
81 | 80 |
v2 = pixel(src, x_c, y_f, width, height, stride, def); |
82 | 81 |
v3 = pixel(src, x_f, y_c, width, height, stride, def); |
83 |
- v4 = pixel(src, x_f, y_f, width, height, stride, def); |
|
82 |
+ v1 = pixel(src, x_c, y_c, width, height, stride, def); |
|
84 | 83 |
|
85 | 84 |
f1 = 1 - sqrt((x_c - x) * (y_c - y)); |
86 | 85 |
f2 = 1 - sqrt((x_c - x) * (y - y_f)); |
... | ... |
@@ -107,109 +104,120 @@ inline int mirror(int v, int m) |
107 | 107 |
return v; |
108 | 108 |
} |
109 | 109 |
|
110 |
-kernel void avfilter_transform(global unsigned char *src, |
|
111 |
- global unsigned char *dst, |
|
112 |
- global float *matrix, |
|
113 |
- global float *matrix2, |
|
114 |
- int interpolate, |
|
115 |
- int fillmethod, |
|
116 |
- int src_stride_lu, |
|
117 |
- int dst_stride_lu, |
|
118 |
- int src_stride_ch, |
|
119 |
- int dst_stride_ch, |
|
120 |
- int height, |
|
121 |
- int width, |
|
122 |
- int ch, |
|
123 |
- int cw) |
|
110 |
+kernel void avfilter_transform_luma(global unsigned char *src, |
|
111 |
+ global unsigned char *dst, |
|
112 |
+ float4 matrix, |
|
113 |
+ int interpolate, |
|
114 |
+ int fill, |
|
115 |
+ int src_stride_lu, |
|
116 |
+ int dst_stride_lu, |
|
117 |
+ int height, |
|
118 |
+ int width) |
|
124 | 119 |
{ |
125 |
- int global_id = get_global_id(0); |
|
126 |
- |
|
127 |
- global unsigned char *dst_y = dst; |
|
128 |
- global unsigned char *dst_u = dst_y + height * dst_stride_lu; |
|
129 |
- global unsigned char *dst_v = dst_u + ch * dst_stride_ch; |
|
130 |
- |
|
131 |
- global unsigned char *src_y = src; |
|
132 |
- global unsigned char *src_u = src_y + height * src_stride_lu; |
|
133 |
- global unsigned char *src_v = src_u + ch * src_stride_ch; |
|
134 |
- |
|
135 |
- global unsigned char *tempdst; |
|
136 |
- global unsigned char *tempsrc; |
|
137 |
- |
|
138 |
- int x; |
|
139 |
- int y; |
|
140 |
- float x_s; |
|
141 |
- float y_s; |
|
142 |
- int tempsrc_stride; |
|
143 |
- int tempdst_stride; |
|
144 |
- int temp_height; |
|
145 |
- int temp_width; |
|
146 |
- int curpos; |
|
147 |
- unsigned char def = 0; |
|
148 |
- if (global_id < width*height) { |
|
149 |
- y = global_id/width; |
|
150 |
- x = global_id%width; |
|
151 |
- x_s = x * matrix[0] + y * matrix[1] + matrix[2]; |
|
152 |
- y_s = x * matrix[3] + y * matrix[4] + matrix[5]; |
|
153 |
- tempdst = dst_y; |
|
154 |
- tempsrc = src_y; |
|
155 |
- tempsrc_stride = src_stride_lu; |
|
156 |
- tempdst_stride = dst_stride_lu; |
|
157 |
- temp_height = height; |
|
158 |
- temp_width = width; |
|
159 |
- } else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) { |
|
160 |
- y = (global_id - width*height)/cw; |
|
161 |
- x = (global_id - width*height)%cw; |
|
162 |
- x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2]; |
|
163 |
- y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5]; |
|
164 |
- tempdst = dst_u; |
|
165 |
- tempsrc = src_u; |
|
166 |
- tempsrc_stride = src_stride_ch; |
|
167 |
- tempdst_stride = dst_stride_ch; |
|
168 |
- temp_height = ch; |
|
169 |
- temp_width = cw; |
|
170 |
- } else { |
|
171 |
- y = (global_id - width*height - ch*cw)/cw; |
|
172 |
- x = (global_id - width*height - ch*cw)%cw; |
|
173 |
- x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2]; |
|
174 |
- y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5]; |
|
175 |
- tempdst = dst_v; |
|
176 |
- tempsrc = src_v; |
|
177 |
- tempsrc_stride = src_stride_ch; |
|
178 |
- tempdst_stride = dst_stride_ch; |
|
179 |
- temp_height = ch; |
|
180 |
- temp_width = cw; |
|
181 |
- } |
|
182 |
- curpos = y * tempdst_stride + x; |
|
183 |
- switch (fillmethod) { |
|
184 |
- case 0: //FILL_BLANK |
|
185 |
- def = 0; |
|
186 |
- break; |
|
187 |
- case 1: //FILL_ORIGINAL |
|
188 |
- def = tempsrc[y*tempsrc_stride+x]; |
|
189 |
- break; |
|
190 |
- case 2: //FILL_CLAMP |
|
191 |
- y_s = clipf(y_s, 0, temp_height - 1); |
|
192 |
- x_s = clipf(x_s, 0, temp_width - 1); |
|
193 |
- def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s]; |
|
194 |
- break; |
|
195 |
- case 3: //FILL_MIRROR |
|
196 |
- y_s = mirror(y_s,temp_height - 1); |
|
197 |
- x_s = mirror(x_s,temp_width - 1); |
|
198 |
- def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s]; |
|
199 |
- break; |
|
120 |
+ int x = get_global_id(0); |
|
121 |
+ int y = get_global_id(1); |
|
122 |
+ int idx_dst = y * dst_stride_lu + x; |
|
123 |
+ unsigned char def = 0; |
|
124 |
+ float x_s = x * matrix.x + y * matrix.y + matrix.z; |
|
125 |
+ float y_s = x * (-matrix.y) + y * matrix.x + matrix.w; |
|
126 |
+ |
|
127 |
+ if (x < width && y < height) { |
|
128 |
+ switch (fill) { |
|
129 |
+ case 0: //FILL_BLANK |
|
130 |
+ def = 0; |
|
131 |
+ break; |
|
132 |
+ case 1: //FILL_ORIGINAL |
|
133 |
+ def = src[y*src_stride_lu + x]; |
|
134 |
+ break; |
|
135 |
+ case 2: //FILL_CLAMP |
|
136 |
+ y_s = clipf(y_s, 0, height - 1); |
|
137 |
+ x_s = clipf(x_s, 0, width - 1); |
|
138 |
+ def = src[(int)y_s * src_stride_lu + (int)x_s]; |
|
139 |
+ break; |
|
140 |
+ case 3: //FILL_MIRROR |
|
141 |
+ y_s = mirror(y_s, height - 1); |
|
142 |
+ x_s = mirror(x_s, width - 1); |
|
143 |
+ def = src[(int)y_s * src_stride_lu + (int)x_s]; |
|
144 |
+ break; |
|
145 |
+ } |
|
146 |
+ switch (interpolate) { |
|
147 |
+ case 0: //INTERPOLATE_NEAREST |
|
148 |
+ dst[idx_dst] = interpolate_nearest(x_s, y_s, src, width, height, src_stride_lu, def); |
|
149 |
+ break; |
|
150 |
+ case 1: //INTERPOLATE_BILINEAR |
|
151 |
+ dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, width, height, src_stride_lu, def); |
|
152 |
+ break; |
|
153 |
+ case 2: //INTERPOLATE_BIQUADRATIC |
|
154 |
+ dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, width, height, src_stride_lu, def); |
|
155 |
+ break; |
|
156 |
+ default: |
|
157 |
+ return; |
|
158 |
+ } |
|
200 | 159 |
} |
201 |
- switch (interpolate) { |
|
202 |
- case 0: //INTERPOLATE_NEAREST |
|
203 |
- tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def); |
|
204 |
- break; |
|
205 |
- case 1: //INTERPOLATE_BILINEAR |
|
206 |
- tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def); |
|
207 |
- break; |
|
208 |
- case 2: //INTERPOLATE_BIQUADRATIC |
|
209 |
- tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def); |
|
210 |
- break; |
|
211 |
- default: |
|
212 |
- return; |
|
160 |
+} |
|
161 |
+ |
|
162 |
+kernel void avfilter_transform_chroma(global unsigned char *src, |
|
163 |
+ global unsigned char *dst, |
|
164 |
+ float4 matrix, |
|
165 |
+ int interpolate, |
|
166 |
+ int fill, |
|
167 |
+ int src_stride_lu, |
|
168 |
+ int dst_stride_lu, |
|
169 |
+ int src_stride_ch, |
|
170 |
+ int dst_stride_ch, |
|
171 |
+ int height, |
|
172 |
+ int width, |
|
173 |
+ int ch, |
|
174 |
+ int cw) |
|
175 |
+{ |
|
176 |
+ |
|
177 |
+ int x = get_global_id(0); |
|
178 |
+ int y = get_global_id(1); |
|
179 |
+ int pad_ch = get_global_size(1)>>1; |
|
180 |
+ global unsigned char *dst_u = dst + height * dst_stride_lu; |
|
181 |
+ global unsigned char *src_u = src + height * src_stride_lu; |
|
182 |
+ global unsigned char *dst_v = dst_u + ch * dst_stride_ch; |
|
183 |
+ global unsigned char *src_v = src_u + ch * src_stride_ch; |
|
184 |
+ src = y < pad_ch ? src_u : src_v; |
|
185 |
+ dst = y < pad_ch ? dst_u : dst_v; |
|
186 |
+ y = select(y - pad_ch, y, y < pad_ch); |
|
187 |
+ float x_s = x * matrix.x + y * matrix.y + matrix.z; |
|
188 |
+ float y_s = x * (-matrix.y) + y * matrix.x + matrix.w; |
|
189 |
+ int idx_dst = y * dst_stride_ch + x; |
|
190 |
+ unsigned char def; |
|
191 |
+ |
|
192 |
+ if (x < cw && y < ch) { |
|
193 |
+ switch (fill) { |
|
194 |
+ case 0: //FILL_BLANK |
|
195 |
+ def = 0; |
|
196 |
+ break; |
|
197 |
+ case 1: //FILL_ORIGINAL |
|
198 |
+ def = src[y*src_stride_ch + x]; |
|
199 |
+ break; |
|
200 |
+ case 2: //FILL_CLAMP |
|
201 |
+ y_s = clipf(y_s, 0, ch - 1); |
|
202 |
+ x_s = clipf(x_s, 0, cw - 1); |
|
203 |
+ def = src[(int)y_s * src_stride_ch + (int)x_s]; |
|
204 |
+ break; |
|
205 |
+ case 3: //FILL_MIRROR |
|
206 |
+ y_s = mirror(y_s, ch - 1); |
|
207 |
+ x_s = mirror(x_s, cw - 1); |
|
208 |
+ def = src[(int)y_s * src_stride_ch + (int)x_s]; |
|
209 |
+ break; |
|
210 |
+ } |
|
211 |
+ switch (interpolate) { |
|
212 |
+ case 0: //INTERPOLATE_NEAREST |
|
213 |
+ dst[idx_dst] = interpolate_nearest(x_s, y_s, src, cw, ch, src_stride_ch, def); |
|
214 |
+ break; |
|
215 |
+ case 1: //INTERPOLATE_BILINEAR |
|
216 |
+ dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, cw, ch, src_stride_ch, def); |
|
217 |
+ break; |
|
218 |
+ case 2: //INTERPOLATE_BIQUADRATIC |
|
219 |
+ dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, cw, ch, src_stride_ch, def); |
|
220 |
+ break; |
|
221 |
+ default: |
|
222 |
+ return; |
|
223 |
+ } |
|
213 | 224 |
} |
214 | 225 |
} |
215 | 226 |
); |