Browse code

avfilter/pthread: use slice threading from avutil

Benchmark (with 2 cpus):
./ffmpeg -f rawvideo -s 1280x720 -t 1000 -i /dev/zero \
-filter_threads $threads -vf transpose=clock -f null null
threads=2:
old: 31.129s 31.446s 31.574s
new: 29.602s 29.636s 29.656s
threads=3 (nb_threads = nb_cpus + 1 is bad choice at this situation):
old: 40.132s 40.279s 40.279s
new: 39.308s 39.570s 39.693s
threads=4:
old: 31.306s 31.366s 31.654s
new: 30.231s 30.360s 30.451s

Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>

Muhammad Faiz authored on 2017/07/12 09:16:33
Showing 1 changed files
... ...
@@ -27,6 +27,7 @@
27 27
 #include "libavutil/cpu.h"
28 28
 #include "libavutil/mem.h"
29 29
 #include "libavutil/thread.h"
30
+#include "libavutil/slicethread.h"
30 31
 
31 32
 #include "avfilter.h"
32 33
 #include "internal.h"
... ...
@@ -34,85 +35,26 @@
34 34
 
35 35
 typedef struct ThreadContext {
36 36
     AVFilterGraph *graph;
37
-
38
-    int nb_threads;
39
-    pthread_t *workers;
37
+    AVSliceThread *thread;
40 38
     avfilter_action_func *func;
41 39
 
42 40
     /* per-execute parameters */
43 41
     AVFilterContext *ctx;
44 42
     void *arg;
45 43
     int   *rets;
46
-    int nb_jobs;
47
-
48
-    pthread_cond_t last_job_cond;
49
-    pthread_cond_t current_job_cond;
50
-    pthread_mutex_t current_job_lock;
51
-    int current_job;
52
-    unsigned int current_execute;
53
-    int done;
54 44
 } ThreadContext;
55 45
 
56
-static void* attribute_align_arg worker(void *v)
46
+static void worker_func(void *priv, int jobnr, int threadnr, int nb_jobs, int nb_threads)
57 47
 {
58
-    ThreadContext *c = v;
59
-    int our_job      = c->nb_jobs;
60
-    int nb_threads   = c->nb_threads;
61
-    unsigned int last_execute = 0;
62
-    int ret, self_id;
63
-
64
-    pthread_mutex_lock(&c->current_job_lock);
65
-    self_id = c->current_job++;
66
-
67
-    for (;;) {
68
-        while (our_job >= c->nb_jobs) {
69
-            if (c->current_job == nb_threads + c->nb_jobs)
70
-                pthread_cond_signal(&c->last_job_cond);
71
-
72
-            while (last_execute == c->current_execute && !c->done)
73
-                pthread_cond_wait(&c->current_job_cond, &c->current_job_lock);
74
-            last_execute = c->current_execute;
75
-            our_job = self_id;
76
-
77
-            if (c->done) {
78
-                pthread_mutex_unlock(&c->current_job_lock);
79
-                return NULL;
80
-            }
81
-        }
82
-        pthread_mutex_unlock(&c->current_job_lock);
83
-
84
-        ret = c->func(c->ctx, c->arg, our_job, c->nb_jobs);
85
-        if (c->rets)
86
-            c->rets[our_job % c->nb_jobs] = ret;
87
-
88
-        pthread_mutex_lock(&c->current_job_lock);
89
-        our_job = c->current_job++;
90
-    }
48
+    ThreadContext *c = priv;
49
+    int ret = c->func(c->ctx, c->arg, jobnr, nb_jobs);
50
+    if (c->rets)
51
+        c->rets[jobnr] = ret;
91 52
 }
92 53
 
93 54
 static void slice_thread_uninit(ThreadContext *c)
94 55
 {
95
-    int i;
96
-
97
-    pthread_mutex_lock(&c->current_job_lock);
98
-    c->done = 1;
99
-    pthread_cond_broadcast(&c->current_job_cond);
100
-    pthread_mutex_unlock(&c->current_job_lock);
101
-
102
-    for (i = 0; i < c->nb_threads; i++)
103
-         pthread_join(c->workers[i], NULL);
104
-
105
-    pthread_mutex_destroy(&c->current_job_lock);
106
-    pthread_cond_destroy(&c->current_job_cond);
107
-    pthread_cond_destroy(&c->last_job_cond);
108
-    av_freep(&c->workers);
109
-}
110
-
111
-static void slice_thread_park_workers(ThreadContext *c)
112
-{
113
-    while (c->current_job != c->nb_threads + c->nb_jobs)
114
-        pthread_cond_wait(&c->last_job_cond, &c->current_job_lock);
115
-    pthread_mutex_unlock(&c->current_job_lock);
56
+    avpriv_slicethread_free(&c->thread);
116 57
 }
117 58
 
118 59
 static int thread_execute(AVFilterContext *ctx, avfilter_action_func *func,
... ...
@@ -122,67 +64,21 @@ static int thread_execute(AVFilterContext *ctx, avfilter_action_func *func,
122 122
 
123 123
     if (nb_jobs <= 0)
124 124
         return 0;
125
-
126
-    pthread_mutex_lock(&c->current_job_lock);
127
-
128
-    c->current_job = c->nb_threads;
129
-    c->nb_jobs     = nb_jobs;
130 125
     c->ctx         = ctx;
131 126
     c->arg         = arg;
132 127
     c->func        = func;
133 128
     c->rets        = ret;
134
-    c->current_execute++;
135
-
136
-    pthread_cond_broadcast(&c->current_job_cond);
137
-
138
-    slice_thread_park_workers(c);
139 129
 
130
+    avpriv_slicethread_execute(c->thread, nb_jobs, 0);
140 131
     return 0;
141 132
 }
142 133
 
143 134
 static int thread_init_internal(ThreadContext *c, int nb_threads)
144 135
 {
145
-    int i, ret;
146
-
147
-    if (!nb_threads) {
148
-        int nb_cpus = av_cpu_count();
149
-        // use number of cores + 1 as thread count if there is more than one
150
-        if (nb_cpus > 1)
151
-            nb_threads = nb_cpus + 1;
152
-        else
153
-            nb_threads = 1;
154
-    }
155
-
136
+    nb_threads = avpriv_slicethread_create(&c->thread, c, worker_func, NULL, nb_threads);
156 137
     if (nb_threads <= 1)
157
-        return 1;
158
-
159
-    c->nb_threads = nb_threads;
160
-    c->workers = av_mallocz_array(sizeof(*c->workers), nb_threads);
161
-    if (!c->workers)
162
-        return AVERROR(ENOMEM);
163
-
164
-    c->current_job = 0;
165
-    c->nb_jobs     = 0;
166
-    c->done        = 0;
167
-
168
-    pthread_cond_init(&c->current_job_cond, NULL);
169
-    pthread_cond_init(&c->last_job_cond,    NULL);
170
-
171
-    pthread_mutex_init(&c->current_job_lock, NULL);
172
-    pthread_mutex_lock(&c->current_job_lock);
173
-    for (i = 0; i < nb_threads; i++) {
174
-        ret = pthread_create(&c->workers[i], NULL, worker, c);
175
-        if (ret) {
176
-           pthread_mutex_unlock(&c->current_job_lock);
177
-           c->nb_threads = i;
178
-           slice_thread_uninit(c);
179
-           return AVERROR(ret);
180
-        }
181
-    }
182
-
183
-    slice_thread_park_workers(c);
184
-
185
-    return c->nb_threads;
138
+        avpriv_slicethread_free(&c->thread);
139
+    return FFMAX(nb_threads, 1);
186 140
 }
187 141
 
188 142
 int ff_graph_thread_init(AVFilterGraph *graph)