Browse code

avfilter/xbr: add slice threading

Clément Bœsch authored on 2014/11/16 06:15:07
Showing 1 changed files
... ...
@@ -26,8 +26,6 @@
26 26
  *
27 27
  * @see http://www.libretro.com/forums/viewtopic.php?f=6&t=134
28 28
  * @see https://github.com/yoyofr/iFBA/blob/master/fba_src/src/intf/video/scalers/xbr.cpp
29
- *
30
- * @todo add threading
31 29
  */
32 30
 
33 31
 #include "libavutil/opt.h"
... ...
@@ -40,7 +38,7 @@
40 40
 #define RED_BLUE_MASK 0x00FF00FF
41 41
 #define GREEN_MASK    0x0000FF00
42 42
 
43
-typedef void (*xbrfunc_t)(AVFrame *input, AVFrame *output, const uint32_t *r2y);
43
+typedef int (*xbrfunc_t)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
44 44
 
45 45
 typedef struct {
46 46
     const AVClass *class;
... ...
@@ -49,6 +47,11 @@ typedef struct {
49 49
     uint32_t rgbtoyuv[1<<24];
50 50
 } XBRContext;
51 51
 
52
+typedef struct ThreadData {
53
+    AVFrame *in, *out;
54
+    const uint32_t *rgbtoyuv;
55
+} ThreadData;
56
+
52 57
 #define OFFSET(x) offsetof(XBRContext, x)
53 58
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
54 59
 static const AVOption xbr_options[] = {
... ...
@@ -182,12 +185,18 @@ static uint32_t pixel_diff(uint32_t x, uint32_t y, const uint32_t *r2y)
182 182
     }                                                                                               \
183 183
 } while (0)
184 184
 
185
-static void xbr2x(AVFrame * input, AVFrame * output, const uint32_t * r2y)
185
+static int xbr2x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
186 186
 {
187 187
     int x, y;
188
+    const ThreadData *td = arg;
189
+    const AVFrame *input = td->in;
190
+    AVFrame *output = td->out;
191
+    const uint32_t *r2y = td->rgbtoyuv;
192
+    const int slice_start = (input->height *  jobnr   ) / nb_jobs;
193
+    const int slice_end   = (input->height * (jobnr+1)) / nb_jobs;
188 194
     const int nl = output->linesize[0] >> 2;
189 195
 
190
-    for (y = 0; y < input->height; y++) {
196
+    for (y = slice_start; y < slice_end; y++) {
191 197
         INIT_SRC_DST_POINTERS(2)
192 198
 
193 199
         for (x = 0; x < input->width; x++) {
... ...
@@ -209,6 +218,7 @@ static void xbr2x(AVFrame * input, AVFrame * output, const uint32_t * r2y)
209 209
             E += 2;
210 210
         }
211 211
     }
212
+    return 0;
212 213
 }
213 214
 
214 215
 #define FILT3(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1,   \
... ...
@@ -251,13 +261,19 @@ static void xbr2x(AVFrame * input, AVFrame * output, const uint32_t * r2y)
251 251
     }                                                                                               \
252 252
 } while (0)
253 253
 
254
-static void xbr3x(AVFrame *input, AVFrame *output, const uint32_t *r2y)
254
+static int xbr3x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
255 255
 {
256 256
     int x, y;
257
+    const ThreadData *td = arg;
258
+    const AVFrame *input = td->in;
259
+    AVFrame *output = td->out;
260
+    const uint32_t *r2y = td->rgbtoyuv;
261
+    const int slice_start = (input->height *  jobnr   ) / nb_jobs;
262
+    const int slice_end   = (input->height * (jobnr+1)) / nb_jobs;
257 263
     const int nl = output->linesize[0] >> 2;
258 264
     const int nl1 = nl + nl;
259 265
 
260
-    for (y = 0; y < input->height; y++) {
266
+    for (y = slice_start; y < slice_end; y++) {
261 267
         INIT_SRC_DST_POINTERS(3)
262 268
 
263 269
         for (x = 0; x < input->width; x++) {
... ...
@@ -281,6 +297,7 @@ static void xbr3x(AVFrame *input, AVFrame *output, const uint32_t *r2y)
281 281
             E += 3;
282 282
         }
283 283
     }
284
+    return 0;
284 285
 }
285 286
 
286 287
 #define FILT4(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1,   \
... ...
@@ -327,14 +344,20 @@ static void xbr3x(AVFrame *input, AVFrame *output, const uint32_t *r2y)
327 327
     }                                                                                               \
328 328
 } while (0)
329 329
 
330
-static void xbr4x(AVFrame *input, AVFrame *output, const uint32_t *r2y)
330
+static int xbr4x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
331 331
 {
332 332
     int x, y;
333
+    const ThreadData *td = arg;
334
+    const AVFrame *input = td->in;
335
+    AVFrame *output = td->out;
336
+    const uint32_t *r2y = td->rgbtoyuv;
337
+    const int slice_start = (input->height *  jobnr   ) / nb_jobs;
338
+    const int slice_end   = (input->height * (jobnr+1)) / nb_jobs;
333 339
     const int nl = output->linesize[0] >> 2;
334 340
     const int nl1 = nl + nl;
335 341
     const int nl2 = nl1 + nl;
336 342
 
337
-    for (y = 0; y < input->height; y++) {
343
+    for (y = slice_start; y < slice_end; y++) {
338 344
         INIT_SRC_DST_POINTERS(4)
339 345
 
340 346
         for (x = 0; x < input->width; x++) {
... ...
@@ -359,6 +382,7 @@ static void xbr4x(AVFrame *input, AVFrame *output, const uint32_t *r2y)
359 359
             E += 4;
360 360
         }
361 361
     }
362
+    return 0;
362 363
 }
363 364
 
364 365
 static int config_output(AVFilterLink *outlink)
... ...
@@ -387,7 +411,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
387 387
     AVFilterContext *ctx = inlink->dst;
388 388
     AVFilterLink *outlink = ctx->outputs[0];
389 389
     XBRContext *xbr = ctx->priv;
390
-    const uint32_t *r2y = xbr->rgbtoyuv;
390
+    ThreadData td;
391 391
 
392 392
     AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
393 393
     if (!out) {
... ...
@@ -396,7 +420,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
396 396
     }
397 397
 
398 398
     av_frame_copy_props(out, in);
399
-    xbr->func(in, out, r2y);
399
+
400
+    td.in = in;
401
+    td.out = out;
402
+    td.rgbtoyuv = xbr->rgbtoyuv;
403
+    ctx->internal->execute(ctx, xbr->func, &td, NULL, FFMIN(inlink->h, ctx->graph->nb_threads));
400 404
 
401 405
     out->width  = outlink->w;
402 406
     out->height = outlink->h;
... ...
@@ -459,4 +487,5 @@ AVFilter ff_vf_xbr = {
459 459
     .priv_size     = sizeof(XBRContext),
460 460
     .priv_class    = &xbr_class,
461 461
     .init          = init,
462
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
462 463
 };