Benchmark (with 2 cpus):
./ffmpeg -f rawvideo -s 1280x720 -t 1000 -i /dev/zero \
-filter_threads $threads -vf transpose=clock -f null null
threads=2:
old: 31.129s 31.446s 31.574s
new: 29.602s 29.636s 29.656s
threads=3 (nb_threads = nb_cpus + 1 is bad choice at this situation):
old: 40.132s 40.279s 40.279s
new: 39.308s 39.570s 39.693s
threads=4:
old: 31.306s 31.366s 31.654s
new: 30.231s 30.360s 30.451s
Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
... | ... |
@@ -27,6 +27,7 @@ |
27 | 27 |
#include "libavutil/cpu.h" |
28 | 28 |
#include "libavutil/mem.h" |
29 | 29 |
#include "libavutil/thread.h" |
30 |
+#include "libavutil/slicethread.h" |
|
30 | 31 |
|
31 | 32 |
#include "avfilter.h" |
32 | 33 |
#include "internal.h" |
... | ... |
@@ -34,85 +35,26 @@ |
34 | 34 |
|
35 | 35 |
typedef struct ThreadContext { |
36 | 36 |
AVFilterGraph *graph; |
37 |
- |
|
38 |
- int nb_threads; |
|
39 |
- pthread_t *workers; |
|
37 |
+ AVSliceThread *thread; |
|
40 | 38 |
avfilter_action_func *func; |
41 | 39 |
|
42 | 40 |
/* per-execute parameters */ |
43 | 41 |
AVFilterContext *ctx; |
44 | 42 |
void *arg; |
45 | 43 |
int *rets; |
46 |
- int nb_jobs; |
|
47 |
- |
|
48 |
- pthread_cond_t last_job_cond; |
|
49 |
- pthread_cond_t current_job_cond; |
|
50 |
- pthread_mutex_t current_job_lock; |
|
51 |
- int current_job; |
|
52 |
- unsigned int current_execute; |
|
53 |
- int done; |
|
54 | 44 |
} ThreadContext; |
55 | 45 |
|
56 |
-static void* attribute_align_arg worker(void *v) |
|
46 |
+static void worker_func(void *priv, int jobnr, int threadnr, int nb_jobs, int nb_threads) |
|
57 | 47 |
{ |
58 |
- ThreadContext *c = v; |
|
59 |
- int our_job = c->nb_jobs; |
|
60 |
- int nb_threads = c->nb_threads; |
|
61 |
- unsigned int last_execute = 0; |
|
62 |
- int ret, self_id; |
|
63 |
- |
|
64 |
- pthread_mutex_lock(&c->current_job_lock); |
|
65 |
- self_id = c->current_job++; |
|
66 |
- |
|
67 |
- for (;;) { |
|
68 |
- while (our_job >= c->nb_jobs) { |
|
69 |
- if (c->current_job == nb_threads + c->nb_jobs) |
|
70 |
- pthread_cond_signal(&c->last_job_cond); |
|
71 |
- |
|
72 |
- while (last_execute == c->current_execute && !c->done) |
|
73 |
- pthread_cond_wait(&c->current_job_cond, &c->current_job_lock); |
|
74 |
- last_execute = c->current_execute; |
|
75 |
- our_job = self_id; |
|
76 |
- |
|
77 |
- if (c->done) { |
|
78 |
- pthread_mutex_unlock(&c->current_job_lock); |
|
79 |
- return NULL; |
|
80 |
- } |
|
81 |
- } |
|
82 |
- pthread_mutex_unlock(&c->current_job_lock); |
|
83 |
- |
|
84 |
- ret = c->func(c->ctx, c->arg, our_job, c->nb_jobs); |
|
85 |
- if (c->rets) |
|
86 |
- c->rets[our_job % c->nb_jobs] = ret; |
|
87 |
- |
|
88 |
- pthread_mutex_lock(&c->current_job_lock); |
|
89 |
- our_job = c->current_job++; |
|
90 |
- } |
|
48 |
+ ThreadContext *c = priv; |
|
49 |
+ int ret = c->func(c->ctx, c->arg, jobnr, nb_jobs); |
|
50 |
+ if (c->rets) |
|
51 |
+ c->rets[jobnr] = ret; |
|
91 | 52 |
} |
92 | 53 |
|
93 | 54 |
static void slice_thread_uninit(ThreadContext *c) |
94 | 55 |
{ |
95 |
- int i; |
|
96 |
- |
|
97 |
- pthread_mutex_lock(&c->current_job_lock); |
|
98 |
- c->done = 1; |
|
99 |
- pthread_cond_broadcast(&c->current_job_cond); |
|
100 |
- pthread_mutex_unlock(&c->current_job_lock); |
|
101 |
- |
|
102 |
- for (i = 0; i < c->nb_threads; i++) |
|
103 |
- pthread_join(c->workers[i], NULL); |
|
104 |
- |
|
105 |
- pthread_mutex_destroy(&c->current_job_lock); |
|
106 |
- pthread_cond_destroy(&c->current_job_cond); |
|
107 |
- pthread_cond_destroy(&c->last_job_cond); |
|
108 |
- av_freep(&c->workers); |
|
109 |
-} |
|
110 |
- |
|
111 |
-static void slice_thread_park_workers(ThreadContext *c) |
|
112 |
-{ |
|
113 |
- while (c->current_job != c->nb_threads + c->nb_jobs) |
|
114 |
- pthread_cond_wait(&c->last_job_cond, &c->current_job_lock); |
|
115 |
- pthread_mutex_unlock(&c->current_job_lock); |
|
56 |
+ avpriv_slicethread_free(&c->thread); |
|
116 | 57 |
} |
117 | 58 |
|
118 | 59 |
static int thread_execute(AVFilterContext *ctx, avfilter_action_func *func, |
... | ... |
@@ -122,67 +64,21 @@ static int thread_execute(AVFilterContext *ctx, avfilter_action_func *func, |
122 | 122 |
|
123 | 123 |
if (nb_jobs <= 0) |
124 | 124 |
return 0; |
125 |
- |
|
126 |
- pthread_mutex_lock(&c->current_job_lock); |
|
127 |
- |
|
128 |
- c->current_job = c->nb_threads; |
|
129 |
- c->nb_jobs = nb_jobs; |
|
130 | 125 |
c->ctx = ctx; |
131 | 126 |
c->arg = arg; |
132 | 127 |
c->func = func; |
133 | 128 |
c->rets = ret; |
134 |
- c->current_execute++; |
|
135 |
- |
|
136 |
- pthread_cond_broadcast(&c->current_job_cond); |
|
137 |
- |
|
138 |
- slice_thread_park_workers(c); |
|
139 | 129 |
|
130 |
+ avpriv_slicethread_execute(c->thread, nb_jobs, 0); |
|
140 | 131 |
return 0; |
141 | 132 |
} |
142 | 133 |
|
143 | 134 |
static int thread_init_internal(ThreadContext *c, int nb_threads) |
144 | 135 |
{ |
145 |
- int i, ret; |
|
146 |
- |
|
147 |
- if (!nb_threads) { |
|
148 |
- int nb_cpus = av_cpu_count(); |
|
149 |
- // use number of cores + 1 as thread count if there is more than one |
|
150 |
- if (nb_cpus > 1) |
|
151 |
- nb_threads = nb_cpus + 1; |
|
152 |
- else |
|
153 |
- nb_threads = 1; |
|
154 |
- } |
|
155 |
- |
|
136 |
+ nb_threads = avpriv_slicethread_create(&c->thread, c, worker_func, NULL, nb_threads); |
|
156 | 137 |
if (nb_threads <= 1) |
157 |
- return 1; |
|
158 |
- |
|
159 |
- c->nb_threads = nb_threads; |
|
160 |
- c->workers = av_mallocz_array(sizeof(*c->workers), nb_threads); |
|
161 |
- if (!c->workers) |
|
162 |
- return AVERROR(ENOMEM); |
|
163 |
- |
|
164 |
- c->current_job = 0; |
|
165 |
- c->nb_jobs = 0; |
|
166 |
- c->done = 0; |
|
167 |
- |
|
168 |
- pthread_cond_init(&c->current_job_cond, NULL); |
|
169 |
- pthread_cond_init(&c->last_job_cond, NULL); |
|
170 |
- |
|
171 |
- pthread_mutex_init(&c->current_job_lock, NULL); |
|
172 |
- pthread_mutex_lock(&c->current_job_lock); |
|
173 |
- for (i = 0; i < nb_threads; i++) { |
|
174 |
- ret = pthread_create(&c->workers[i], NULL, worker, c); |
|
175 |
- if (ret) { |
|
176 |
- pthread_mutex_unlock(&c->current_job_lock); |
|
177 |
- c->nb_threads = i; |
|
178 |
- slice_thread_uninit(c); |
|
179 |
- return AVERROR(ret); |
|
180 |
- } |
|
181 |
- } |
|
182 |
- |
|
183 |
- slice_thread_park_workers(c); |
|
184 |
- |
|
185 |
- return c->nb_threads; |
|
138 |
+ avpriv_slicethread_free(&c->thread); |
|
139 |
+ return FFMAX(nb_threads, 1); |
|
186 | 140 |
} |
187 | 141 |
|
188 | 142 |
int ff_graph_thread_init(AVFilterGraph *graph) |