Browse code

lavfi/deshake_opencl: optimze transform filter

Reviewed-by: Wei Gao <highgod0401@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

Lenny Wang authored on 2013/12/14 20:11:00
Showing 4 changed files
... ...
@@ -1,5 +1,6 @@
1 1
 /*
2 2
  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3
+ * Copyright (C) 2013 Lenny Wang
3 4
  *
4 5
  * This file is part of FFmpeg.
5 6
  *
... ...
@@ -57,12 +58,8 @@ typedef struct {
57 57
 typedef struct {
58 58
     cl_command_queue command_queue;
59 59
     cl_program program;
60
-    cl_kernel kernel;
61
-    size_t matrix_size;
62
-    float matrix_y[9];
63
-    float matrix_uv[9];
64
-    cl_mem cl_matrix_y;
65
-    cl_mem cl_matrix_uv;
60
+    cl_kernel kernel_luma;
61
+    cl_kernel kernel_chroma;
66 62
     int in_plane_size[8];
67 63
     int out_plane_size[8];
68 64
     int plane_num;
... ...
@@ -1,5 +1,6 @@
1 1
 /*
2 2
  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3
+ * Copyright (C) 2013 Lenny Wang
3 4
  *
4 5
  * This file is part of FFmpeg.
5 6
  *
... ...
@@ -29,8 +30,8 @@
29 29
 #include "deshake_opencl.h"
30 30
 #include "libavutil/opencl_internal.h"
31 31
 
32
-#define MATRIX_SIZE 6
33 32
 #define PLANE_NUM 3
33
+#define ROUND_TO_16(a) ((((a - 1)/16)+1)*16)
34 34
 
35 35
 int ff_opencl_transform(AVFilterContext *ctx,
36 36
                         int width, int height, int cw, int ch,
... ...
@@ -39,29 +40,40 @@ int ff_opencl_transform(AVFilterContext *ctx,
39 39
                         enum FillMethod fill, AVFrame *in, AVFrame *out)
40 40
 {
41 41
     int ret = 0;
42
-    const size_t global_work_size = width * height + 2 * ch * cw;
43 42
     cl_int status;
44 43
     DeshakeContext *deshake = ctx->priv;
45
-    FFOpenclParam opencl_param = {0};
46
-
47
-    opencl_param.ctx = ctx;
48
-    opencl_param.kernel = deshake->opencl_ctx.kernel;
49
-    ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_y, (uint8_t *)matrix_y, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
50
-    if (ret < 0)
51
-        return ret;
52
-    ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_uv, (uint8_t *)matrix_uv, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
53
-    if (ret < 0)
54
-        return ret;
44
+    float4 packed_matrix_lu = {matrix_y[0], matrix_y[1], matrix_y[2], matrix_y[5]};
45
+    float4 packed_matrix_ch = {matrix_uv[0], matrix_uv[1], matrix_uv[2], matrix_uv[5]};
46
+    size_t global_worksize_lu[2] = {(size_t)ROUND_TO_16(width), (size_t)ROUND_TO_16(height)};
47
+    size_t global_worksize_ch[2] = {(size_t)ROUND_TO_16(cw), (size_t)(2*ROUND_TO_16(ch))};
48
+    size_t local_worksize[2] = {16, 16};
49
+    FFOpenclParam param_lu = {0};
50
+    FFOpenclParam param_ch = {0};
51
+    param_lu.ctx = param_ch.ctx = ctx;
52
+    param_lu.kernel = deshake->opencl_ctx.kernel_luma;
53
+    param_ch.kernel = deshake->opencl_ctx.kernel_chroma;
55 54
 
56 55
     if ((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) {
57 56
         av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n");
58 57
         return AVERROR(EINVAL);
59 58
     }
60
-    ret = ff_opencl_set_parameter(&opencl_param,
59
+    ret = ff_opencl_set_parameter(&param_lu,
60
+                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf),
61
+                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf),
62
+                                  FF_OPENCL_PARAM_INFO(packed_matrix_lu),
63
+                                  FF_OPENCL_PARAM_INFO(interpolate),
64
+                                  FF_OPENCL_PARAM_INFO(fill),
65
+                                  FF_OPENCL_PARAM_INFO(in->linesize[0]),
66
+                                  FF_OPENCL_PARAM_INFO(out->linesize[0]),
67
+                                  FF_OPENCL_PARAM_INFO(height),
68
+                                  FF_OPENCL_PARAM_INFO(width),
69
+                                  NULL);
70
+    if (ret < 0)
71
+        return ret;
72
+    ret = ff_opencl_set_parameter(&param_ch,
61 73
                                   FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_inbuf),
62 74
                                   FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_outbuf),
63
-                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_matrix_y),
64
-                                  FF_OPENCL_PARAM_INFO(deshake->opencl_ctx.cl_matrix_uv),
75
+                                  FF_OPENCL_PARAM_INFO(packed_matrix_ch),
65 76
                                   FF_OPENCL_PARAM_INFO(interpolate),
66 77
                                   FF_OPENCL_PARAM_INFO(fill),
67 78
                                   FF_OPENCL_PARAM_INFO(in->linesize[0]),
... ...
@@ -76,13 +88,15 @@ int ff_opencl_transform(AVFilterContext *ctx,
76 76
     if (ret < 0)
77 77
         return ret;
78 78
     status = clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue,
79
-                                    deshake->opencl_ctx.kernel, 1, NULL,
80
-                                    &global_work_size, NULL, 0, NULL, NULL);
79
+                                    deshake->opencl_ctx.kernel_luma, 2, NULL,
80
+                                    global_worksize_lu, local_worksize, 0, NULL, NULL);
81
+    status |= clEnqueueNDRangeKernel(deshake->opencl_ctx.command_queue,
82
+                                    deshake->opencl_ctx.kernel_chroma, 2, NULL,
83
+                                    global_worksize_ch, local_worksize, 0, NULL, NULL);
81 84
     if (status != CL_SUCCESS) {
82 85
         av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
83 86
         return AVERROR_EXTERNAL;
84 87
     }
85
-    clFinish(deshake->opencl_ctx.command_queue);
86 88
     ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size,
87 89
                                       deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf,
88 90
                                       deshake->opencl_ctx.cl_outbuf_size);
... ...
@@ -98,16 +112,7 @@ int ff_opencl_deshake_init(AVFilterContext *ctx)
98 98
     ret = av_opencl_init(NULL);
99 99
     if (ret < 0)
100 100
         return ret;
101
-    deshake->opencl_ctx.matrix_size = MATRIX_SIZE;
102
-    deshake->opencl_ctx.plane_num   = PLANE_NUM;
103
-    ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_y,
104
-        deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
105
-    if (ret < 0)
106
-        return ret;
107
-    ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_uv,
108
-        deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
109
-    if (ret < 0)
110
-        return ret;
101
+    deshake->opencl_ctx.plane_num = PLANE_NUM;
111 102
     deshake->opencl_ctx.command_queue = av_opencl_get_command_queue();
112 103
     if (!deshake->opencl_ctx.command_queue) {
113 104
         av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'deshake'\n");
... ...
@@ -118,10 +123,19 @@ int ff_opencl_deshake_init(AVFilterContext *ctx)
118 118
         av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'avfilter_transform'\n");
119 119
         return AVERROR(EINVAL);
120 120
     }
121
-    if (!deshake->opencl_ctx.kernel) {
122
-        deshake->opencl_ctx.kernel = clCreateKernel(deshake->opencl_ctx.program, "avfilter_transform", &ret);
121
+    if (!deshake->opencl_ctx.kernel_luma) {
122
+        deshake->opencl_ctx.kernel_luma = clCreateKernel(deshake->opencl_ctx.program,
123
+                                                         "avfilter_transform_luma", &ret);
124
+        if (ret != CL_SUCCESS) {
125
+            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_luma'\n");
126
+            return AVERROR(EINVAL);
127
+        }
128
+    }
129
+    if (!deshake->opencl_ctx.kernel_chroma) {
130
+        deshake->opencl_ctx.kernel_chroma = clCreateKernel(deshake->opencl_ctx.program,
131
+                                                           "avfilter_transform_chroma", &ret);
123 132
         if (ret != CL_SUCCESS) {
124
-            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform'\n");
133
+            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'avfilter_transform_chroma'\n");
125 134
             return AVERROR(EINVAL);
126 135
         }
127 136
     }
... ...
@@ -133,9 +147,8 @@ void ff_opencl_deshake_uninit(AVFilterContext *ctx)
133 133
     DeshakeContext *deshake = ctx->priv;
134 134
     av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf);
135 135
     av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf);
136
-    av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_y);
137
-    av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_uv);
138
-    clReleaseKernel(deshake->opencl_ctx.kernel);
136
+    clReleaseKernel(deshake->opencl_ctx.kernel_luma);
137
+    clReleaseKernel(deshake->opencl_ctx.kernel_chroma);
139 138
     clReleaseProgram(deshake->opencl_ctx.program);
140 139
     deshake->opencl_ctx.command_queue = NULL;
141 140
     av_opencl_uninit();
... ...
@@ -23,6 +23,13 @@
23 23
 
24 24
 #include "deshake.h"
25 25
 
26
+typedef struct {
27
+    float x;
28
+    float y;
29
+    float z;
30
+    float w;
31
+} float4;
32
+
26 33
 int ff_opencl_deshake_init(AVFilterContext *ctx);
27 34
 
28 35
 void ff_opencl_deshake_uninit(AVFilterContext *ctx);
... ...
@@ -1,5 +1,6 @@
1 1
 /*
2 2
  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3
+ * Copyright (C) 2013 Lenny Wang
3 4
  *
4 5
  *
5 6
  * This file is part of FFmpeg.
... ...
@@ -25,16 +26,16 @@
25 25
 #include "libavutil/opencl.h"
26 26
 
27 27
 const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
28
-
29
-inline unsigned char pixel(global const unsigned char *src, float x, float y,
28
+inline unsigned char pixel(global const unsigned char *src, int x, int y,
30 29
                            int w, int h,int stride, unsigned char def)
31 30
 {
32
-    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride];
31
+    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[x + y * stride];
33 32
 }
33
+
34 34
 unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
35 35
                                   int width, int height, int stride, unsigned char def)
36 36
 {
37
-    return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def);
37
+    return pixel(src, (int)(x + 0.5f), (int)(y + 0.5f), width, height, stride, def);
38 38
 }
39 39
 
40 40
 unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
... ...
@@ -42,21 +43,18 @@ unsigned char interpolate_bilinear(float x, float y, global const unsigned char
42 42
 {
43 43
     int x_c, x_f, y_c, y_f;
44 44
     int v1, v2, v3, v4;
45
+    x_f = (int)x;
46
+    y_f = (int)y;
47
+    x_c = x_f + 1;
48
+    y_c = y_f + 1;
45 49
 
46
-    if (x < -1 || x > width || y < -1 || y > height) {
50
+    if (x_f < -1 || x_f > width || y_f < -1 || y_f > height) {
47 51
         return def;
48 52
     } else {
49
-        x_f = (int)x;
50
-        x_c = x_f + 1;
51
-
52
-        y_f = (int)y;
53
-        y_c = y_f + 1;
54
-
55
-        v1 = pixel(src, x_c, y_c, width, height, stride, def);
53
+        v4 = pixel(src, x_f, y_f, width, height, stride, def);
56 54
         v2 = pixel(src, x_c, y_f, width, height, stride, def);
57 55
         v3 = pixel(src, x_f, y_c, width, height, stride, def);
58
-        v4 = pixel(src, x_f, y_f, width, height, stride, def);
59
-
56
+        v1 = pixel(src, x_c, y_c, width, height, stride, def);
60 57
         return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
61 58
                 v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
62 59
     }
... ...
@@ -68,19 +66,18 @@ unsigned char interpolate_biquadratic(float x, float y, global const unsigned ch
68 68
     int     x_c, x_f, y_c, y_f;
69 69
     unsigned char v1,  v2,  v3,  v4;
70 70
     float   f1,  f2,  f3,  f4;
71
+    x_f = (int)x;
72
+    y_f = (int)y;
73
+    x_c = x_f + 1;
74
+    y_c = y_f + 1;
71 75
 
72
-    if (x < - 1 || x > width || y < -1 || y > height)
76
+    if (x_f < - 1 || x_f > width || y_f < -1 || y_f > height)
73 77
         return def;
74 78
     else {
75
-        x_f = (int)x;
76
-        x_c = x_f + 1;
77
-        y_f = (int)y;
78
-        y_c = y_f + 1;
79
-
80
-        v1 = pixel(src, x_c, y_c, width, height, stride, def);
79
+        v4 = pixel(src, x_f, y_f, width, height, stride, def);
81 80
         v2 = pixel(src, x_c, y_f, width, height, stride, def);
82 81
         v3 = pixel(src, x_f, y_c, width, height, stride, def);
83
-        v4 = pixel(src, x_f, y_f, width, height, stride, def);
82
+        v1 = pixel(src, x_c, y_c, width, height, stride, def);
84 83
 
85 84
         f1 = 1 - sqrt((x_c - x) * (y_c - y));
86 85
         f2 = 1 - sqrt((x_c - x) * (y - y_f));
... ...
@@ -107,109 +104,120 @@ inline int mirror(int v, int m)
107 107
     return v;
108 108
 }
109 109
 
110
-kernel void avfilter_transform(global  unsigned char *src,
111
-                               global  unsigned char *dst,
112
-                               global          float *matrix,
113
-                               global          float *matrix2,
114
-                                                 int interpolate,
115
-                                                 int fillmethod,
116
-                                                 int src_stride_lu,
117
-                                                 int dst_stride_lu,
118
-                                                 int src_stride_ch,
119
-                                                 int dst_stride_ch,
120
-                                                 int height,
121
-                                                 int width,
122
-                                                 int ch,
123
-                                                 int cw)
110
+kernel void avfilter_transform_luma(global unsigned char *src,
111
+                                    global unsigned char *dst,
112
+                                    float4 matrix,
113
+                                    int interpolate,
114
+                                    int fill,
115
+                                    int src_stride_lu,
116
+                                    int dst_stride_lu,
117
+                                    int height,
118
+                                    int width)
124 119
 {
125
-     int global_id = get_global_id(0);
126
-
127
-     global unsigned char *dst_y = dst;
128
-     global unsigned char *dst_u = dst_y + height * dst_stride_lu;
129
-     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
130
-
131
-     global unsigned char *src_y = src;
132
-     global unsigned char *src_u = src_y + height * src_stride_lu;
133
-     global unsigned char *src_v = src_u + ch * src_stride_ch;
134
-
135
-     global unsigned char *tempdst;
136
-     global unsigned char *tempsrc;
137
-
138
-     int x;
139
-     int y;
140
-     float x_s;
141
-     float y_s;
142
-     int tempsrc_stride;
143
-     int tempdst_stride;
144
-     int temp_height;
145
-     int temp_width;
146
-     int curpos;
147
-     unsigned char def = 0;
148
-     if (global_id < width*height) {
149
-        y = global_id/width;
150
-        x = global_id%width;
151
-        x_s = x * matrix[0] + y * matrix[1] + matrix[2];
152
-        y_s = x * matrix[3] + y * matrix[4] + matrix[5];
153
-        tempdst = dst_y;
154
-        tempsrc = src_y;
155
-        tempsrc_stride = src_stride_lu;
156
-        tempdst_stride = dst_stride_lu;
157
-        temp_height = height;
158
-        temp_width = width;
159
-     } else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) {
160
-        y = (global_id - width*height)/cw;
161
-        x = (global_id - width*height)%cw;
162
-        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
163
-        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
164
-        tempdst = dst_u;
165
-        tempsrc = src_u;
166
-        tempsrc_stride = src_stride_ch;
167
-        tempdst_stride = dst_stride_ch;
168
-        temp_height = ch;
169
-        temp_width = cw;
170
-     } else {
171
-        y = (global_id - width*height - ch*cw)/cw;
172
-        x = (global_id - width*height - ch*cw)%cw;
173
-        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
174
-        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
175
-        tempdst = dst_v;
176
-        tempsrc = src_v;
177
-        tempsrc_stride = src_stride_ch;
178
-        tempdst_stride = dst_stride_ch;
179
-        temp_height = ch;
180
-        temp_width = cw;
181
-     }
182
-     curpos = y * tempdst_stride + x;
183
-     switch (fillmethod) {
184
-        case 0: //FILL_BLANK
185
-            def = 0;
186
-            break;
187
-        case 1: //FILL_ORIGINAL
188
-            def = tempsrc[y*tempsrc_stride+x];
189
-            break;
190
-        case 2: //FILL_CLAMP
191
-            y_s = clipf(y_s, 0, temp_height - 1);
192
-            x_s = clipf(x_s, 0, temp_width - 1);
193
-            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
194
-            break;
195
-        case 3: //FILL_MIRROR
196
-            y_s = mirror(y_s,temp_height - 1);
197
-            x_s = mirror(x_s,temp_width - 1);
198
-            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
199
-            break;
120
+    int x = get_global_id(0);
121
+    int y = get_global_id(1);
122
+    int idx_dst = y * dst_stride_lu + x;
123
+    unsigned char def = 0;
124
+    float x_s = x * matrix.x + y * matrix.y + matrix.z;
125
+    float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;
126
+
127
+    if (x < width && y < height) {
128
+        switch (fill) {
129
+            case 0: //FILL_BLANK
130
+                def = 0;
131
+                break;
132
+            case 1: //FILL_ORIGINAL
133
+                def = src[y*src_stride_lu + x];
134
+                break;
135
+            case 2: //FILL_CLAMP
136
+                y_s = clipf(y_s, 0, height - 1);
137
+                x_s = clipf(x_s, 0, width - 1);
138
+                def = src[(int)y_s * src_stride_lu + (int)x_s];
139
+                break;
140
+            case 3: //FILL_MIRROR
141
+                y_s = mirror(y_s, height - 1);
142
+                x_s = mirror(x_s, width - 1);
143
+                def = src[(int)y_s * src_stride_lu + (int)x_s];
144
+                break;
145
+        }
146
+        switch (interpolate) {
147
+            case 0: //INTERPOLATE_NEAREST
148
+                dst[idx_dst] = interpolate_nearest(x_s, y_s, src, width, height, src_stride_lu, def);
149
+                break;
150
+            case 1: //INTERPOLATE_BILINEAR
151
+                dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, width, height, src_stride_lu, def);
152
+                break;
153
+            case 2: //INTERPOLATE_BIQUADRATIC
154
+                dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, width, height, src_stride_lu, def);
155
+                break;
156
+            default:
157
+                return;
158
+        }
200 159
     }
201
-    switch (interpolate) {
202
-        case 0: //INTERPOLATE_NEAREST
203
-            tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
204
-            break;
205
-        case 1: //INTERPOLATE_BILINEAR
206
-            tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
207
-            break;
208
-        case 2: //INTERPOLATE_BIQUADRATIC
209
-            tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
210
-            break;
211
-        default:
212
-            return;
160
+}
161
+
162
+kernel void avfilter_transform_chroma(global unsigned char *src,
163
+                                      global unsigned char *dst,
164
+                                      float4 matrix,
165
+                                      int interpolate,
166
+                                      int fill,
167
+                                      int src_stride_lu,
168
+                                      int dst_stride_lu,
169
+                                      int src_stride_ch,
170
+                                      int dst_stride_ch,
171
+                                      int height,
172
+                                      int width,
173
+                                      int ch,
174
+                                      int cw)
175
+{
176
+
177
+    int x = get_global_id(0);
178
+    int y = get_global_id(1);
179
+    int pad_ch = get_global_size(1)>>1;
180
+    global unsigned char *dst_u = dst + height * dst_stride_lu;
181
+    global unsigned char *src_u = src + height * src_stride_lu;
182
+    global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
183
+    global unsigned char *src_v = src_u + ch * src_stride_ch;
184
+    src = y < pad_ch ? src_u : src_v;
185
+    dst = y < pad_ch ? dst_u : dst_v;
186
+    y = select(y - pad_ch, y, y < pad_ch);
187
+    float x_s = x * matrix.x + y * matrix.y + matrix.z;
188
+    float y_s = x * (-matrix.y) + y * matrix.x + matrix.w;
189
+    int idx_dst = y * dst_stride_ch + x;
190
+    unsigned char def;
191
+
192
+    if (x < cw && y < ch) {
193
+        switch (fill) {
194
+            case 0: //FILL_BLANK
195
+                def = 0;
196
+                break;
197
+            case 1: //FILL_ORIGINAL
198
+                def = src[y*src_stride_ch + x];
199
+                break;
200
+            case 2: //FILL_CLAMP
201
+                y_s = clipf(y_s, 0, ch - 1);
202
+                x_s = clipf(x_s, 0, cw - 1);
203
+                def = src[(int)y_s * src_stride_ch + (int)x_s];
204
+                break;
205
+            case 3: //FILL_MIRROR
206
+                y_s = mirror(y_s, ch - 1);
207
+                x_s = mirror(x_s, cw - 1);
208
+                def = src[(int)y_s * src_stride_ch + (int)x_s];
209
+                break;
210
+        }
211
+        switch (interpolate) {
212
+            case 0: //INTERPOLATE_NEAREST
213
+                dst[idx_dst] = interpolate_nearest(x_s, y_s, src, cw, ch, src_stride_ch, def);
214
+                break;
215
+            case 1: //INTERPOLATE_BILINEAR
216
+                dst[idx_dst] = interpolate_bilinear(x_s, y_s, src, cw, ch, src_stride_ch, def);
217
+                break;
218
+            case 2: //INTERPOLATE_BIQUADRATIC
219
+                dst[idx_dst] = interpolate_biquadratic(x_s, y_s, src, cw, ch, src_stride_ch, def);
220
+                break;
221
+            default:
222
+                return;
223
+        }
213 224
     }
214 225
 }
215 226
 );