It provides the following features:
* verify correctness by comparing output to the C version.
* detect failure to save and restore clobbered callee-saved registers.
* detect 32-bit parameters being used as if they were 64-bit in x86-64
(the upper halves are not guaranteed to be zero - but in practice
they very often are, which makes those bugs hard to spot otherwise).
* easy benchmarking.
Compile by running 'make checkasm'.
Execute by running 'tests/checkasm/checkasm'.
Optional arguments are '--bench' to run benchmarks for all functions,
'--bench=<pattern>' to run benchmarks for all functions that starts with
<pattern>, and '<integer>' to seed the PRNG for reproducible results.
Contains unit tests for most h264pred functions to get started, more tests
can be added afterwards using those as a reference.
Loosely based on code from x264. Currently only supports x86 and x86-64,
but additional architectures shouldn't be too much of an obstacle to add.
Note that functions with floating point parameters or floating point
return values are not supported. Some compiler-specific features or
preprocessor hacks would likely be required to add support for that.
Signed-off-by: Janne Grunau <janne-libav@jannau.net>
178 | 180 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,33 @@ |
0 |
+# libavcodec tests |
|
1 |
+AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o |
|
2 |
+ |
|
3 |
+CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) |
|
4 |
+ |
|
5 |
+ |
|
6 |
+-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile |
|
7 |
+ |
|
8 |
+CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o |
|
9 |
+CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%)) |
|
10 |
+ |
|
11 |
+-include $(CHECKASMOBJS:.o=.d) |
|
12 |
+ |
|
13 |
+CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS))) |
|
14 |
+$(CHECKASMOBJS): | $(CHECKASMDIRS) |
|
15 |
+OBJDIRS += $(CHECKASMDIRS) |
|
16 |
+ |
|
17 |
+# We rely on function pointers intentionally declared without specified argument types. |
|
18 |
+tests/checkasm/%.o: CFLAGS := $(CFLAGS:-Wstrict-prototypes=-Wno-strict-prototypes) |
|
19 |
+ |
|
20 |
+CHECKASM := tests/checkasm/checkasm$(EXESUF) |
|
21 |
+ |
|
22 |
+$(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_DEP_LIBS) |
|
23 |
+ $(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS) |
|
24 |
+ |
|
25 |
+checkasm: $(CHECKASM) |
|
26 |
+ |
|
27 |
+clean:: checkasmclean |
|
28 |
+ |
|
29 |
+checkasmclean: |
|
30 |
+ $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%) |
|
31 |
+ |
|
32 |
+.PHONY: checkasm |
0 | 33 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,484 @@ |
0 |
+/* |
|
1 |
+ * Assembly testing and benchmarking tool |
|
2 |
+ * Copyright (c) 2015 Henrik Gramner |
|
3 |
+ * Copyright (c) 2008 Loren Merritt |
|
4 |
+ * |
|
5 |
+ * This file is part of Libav. |
|
6 |
+ * |
|
7 |
+ * Libav is free software; you can redistribute it and/or modify |
|
8 |
+ * it under the terms of the GNU General Public License as published by |
|
9 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
10 |
+ * (at your option) any later version. |
|
11 |
+ * |
|
12 |
+ * Libav is distributed in the hope that it will be useful, |
|
13 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15 |
+ * GNU General Public License for more details. |
|
16 |
+ * |
|
17 |
+ * You should have received a copy of the GNU General Public License along |
|
18 |
+ * with Libav; if not, write to the Free Software Foundation, Inc., |
|
19 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
+ */ |
|
21 |
+ |
|
22 |
+#include <stdarg.h> |
|
23 |
+#include <stdio.h> |
|
24 |
+#include <stdlib.h> |
|
25 |
+#include <string.h> |
|
26 |
+#include "checkasm.h" |
|
27 |
+#include "libavutil/common.h" |
|
28 |
+#include "libavutil/cpu.h" |
|
29 |
+#include "libavutil/random_seed.h" |
|
30 |
+ |
|
31 |
+#if ARCH_X86 |
|
32 |
+#include "libavutil/x86/cpu.h" |
|
33 |
+#endif |
|
34 |
+ |
|
35 |
+#if HAVE_SETCONSOLETEXTATTRIBUTE |
|
36 |
+#include <windows.h> |
|
37 |
+#define COLOR_RED FOREGROUND_RED |
|
38 |
+#define COLOR_GREEN FOREGROUND_GREEN |
|
39 |
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) |
|
40 |
+#else |
|
41 |
+#define COLOR_RED 1 |
|
42 |
+#define COLOR_GREEN 2 |
|
43 |
+#define COLOR_YELLOW 3 |
|
44 |
+#endif |
|
45 |
+ |
|
46 |
+#if HAVE_UNISTD_H |
|
47 |
+#include <unistd.h> |
|
48 |
+#endif |
|
49 |
+ |
|
50 |
+#if !HAVE_ISATTY |
|
51 |
+#define isatty(fd) 1 |
|
52 |
+#endif |
|
53 |
+ |
|
54 |
+/* List of tests to invoke */ |
|
55 |
+static void (* const tests[])(void) = { |
|
56 |
+#if CONFIG_H264PRED |
|
57 |
+ checkasm_check_h264pred, |
|
58 |
+#endif |
|
59 |
+ NULL |
|
60 |
+}; |
|
61 |
+ |
|
62 |
+/* List of cpu flags to check */ |
|
63 |
+static const struct { |
|
64 |
+ const char *name; |
|
65 |
+ const char *suffix; |
|
66 |
+ int flag; |
|
67 |
+} cpus[] = { |
|
68 |
+#if ARCH_X86 |
|
69 |
+ { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, |
|
70 |
+ { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, |
|
71 |
+ { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, |
|
72 |
+ { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, |
|
73 |
+ { "SSE", "sse", AV_CPU_FLAG_SSE }, |
|
74 |
+ { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, |
|
75 |
+ { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, |
|
76 |
+ { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, |
|
77 |
+ { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, |
|
78 |
+ { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, |
|
79 |
+ { "AVX", "avx", AV_CPU_FLAG_AVX }, |
|
80 |
+ { "XOP", "xop", AV_CPU_FLAG_XOP }, |
|
81 |
+ { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, |
|
82 |
+ { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, |
|
83 |
+ { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, |
|
84 |
+#endif |
|
85 |
+ { NULL } |
|
86 |
+}; |
|
87 |
+ |
|
88 |
+typedef struct CheckasmFuncVersion { |
|
89 |
+ struct CheckasmFuncVersion *next; |
|
90 |
+ intptr_t (*func)(); |
|
91 |
+ int ok; |
|
92 |
+ int cpu; |
|
93 |
+ int iterations; |
|
94 |
+ uint64_t cycles; |
|
95 |
+} CheckasmFuncVersion; |
|
96 |
+ |
|
97 |
+/* Binary search tree node */ |
|
98 |
+typedef struct CheckasmFunc { |
|
99 |
+ struct CheckasmFunc *child[2]; |
|
100 |
+ CheckasmFuncVersion versions; |
|
101 |
+ char name[1]; |
|
102 |
+} CheckasmFunc; |
|
103 |
+ |
|
104 |
+/* Internal state */ |
|
105 |
+static struct { |
|
106 |
+ CheckasmFunc *funcs; |
|
107 |
+ CheckasmFunc *current_func; |
|
108 |
+ CheckasmFuncVersion *current_func_ver; |
|
109 |
+ const char *bench_pattern; |
|
110 |
+ int bench_pattern_len; |
|
111 |
+ int num_checked; |
|
112 |
+ int num_failed; |
|
113 |
+ int nop_time; |
|
114 |
+ int cpu_flag; |
|
115 |
+ const char *cpu_flag_name; |
|
116 |
+} state; |
|
117 |
+ |
|
118 |
+/* PRNG state */ |
|
119 |
+AVLFG checkasm_lfg; |
|
120 |
+ |
|
121 |
+/* Print colored text to stderr if the terminal supports it */ |
|
122 |
+static void color_printf(int color, const char *fmt, ...) |
|
123 |
+{ |
|
124 |
+ static int use_color = -1; |
|
125 |
+ va_list arg; |
|
126 |
+ |
|
127 |
+#if HAVE_SETCONSOLETEXTATTRIBUTE |
|
128 |
+ static HANDLE con; |
|
129 |
+ static WORD org_attributes; |
|
130 |
+ |
|
131 |
+ if (use_color < 0) { |
|
132 |
+ CONSOLE_SCREEN_BUFFER_INFO con_info; |
|
133 |
+ con = GetStdHandle(STD_ERROR_HANDLE); |
|
134 |
+ if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { |
|
135 |
+ org_attributes = con_info.wAttributes; |
|
136 |
+ use_color = 1; |
|
137 |
+ } else |
|
138 |
+ use_color = 0; |
|
139 |
+ } |
|
140 |
+ if (use_color) |
|
141 |
+ SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); |
|
142 |
+#else |
|
143 |
+ if (use_color < 0) { |
|
144 |
+ const char *term = getenv("TERM"); |
|
145 |
+ use_color = term && strcmp(term, "dumb") && isatty(2); |
|
146 |
+ } |
|
147 |
+ if (use_color) |
|
148 |
+ fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); |
|
149 |
+#endif |
|
150 |
+ |
|
151 |
+ va_start(arg, fmt); |
|
152 |
+ vfprintf(stderr, fmt, arg); |
|
153 |
+ va_end(arg); |
|
154 |
+ |
|
155 |
+ if (use_color) { |
|
156 |
+#if HAVE_SETCONSOLETEXTATTRIBUTE |
|
157 |
+ SetConsoleTextAttribute(con, org_attributes); |
|
158 |
+#else |
|
159 |
+ fprintf(stderr, "\x1b[0m"); |
|
160 |
+#endif |
|
161 |
+ } |
|
162 |
+} |
|
163 |
+ |
|
164 |
+/* Deallocate a tree */ |
|
165 |
+static void destroy_func_tree(CheckasmFunc *f) |
|
166 |
+{ |
|
167 |
+ if (f) { |
|
168 |
+ CheckasmFuncVersion *v = f->versions.next; |
|
169 |
+ while (v) { |
|
170 |
+ CheckasmFuncVersion *next = v->next; |
|
171 |
+ free(v); |
|
172 |
+ v = next; |
|
173 |
+ } |
|
174 |
+ |
|
175 |
+ destroy_func_tree(f->child[0]); |
|
176 |
+ destroy_func_tree(f->child[1]); |
|
177 |
+ free(f); |
|
178 |
+ } |
|
179 |
+} |
|
180 |
+ |
|
181 |
+/* Allocate a zero-initialized block, clean up and exit on failure */ |
|
182 |
+static void *checkasm_malloc(size_t size) |
|
183 |
+{ |
|
184 |
+ void *ptr = calloc(1, size); |
|
185 |
+ if (!ptr) { |
|
186 |
+ fprintf(stderr, "checkasm: malloc failed\n"); |
|
187 |
+ destroy_func_tree(state.funcs); |
|
188 |
+ exit(1); |
|
189 |
+ } |
|
190 |
+ return ptr; |
|
191 |
+} |
|
192 |
+ |
|
193 |
+/* Get the suffix of the specified cpu flag */ |
|
194 |
+static const char *cpu_suffix(int cpu) |
|
195 |
+{ |
|
196 |
+ int i = FF_ARRAY_ELEMS(cpus); |
|
197 |
+ |
|
198 |
+ while (--i >= 0) |
|
199 |
+ if (cpu & cpus[i].flag) |
|
200 |
+ return cpus[i].suffix; |
|
201 |
+ |
|
202 |
+ return "c"; |
|
203 |
+} |
|
204 |
+ |
|
205 |
+#ifdef AV_READ_TIME |
|
206 |
+static int cmp_nop(const void *a, const void *b) |
|
207 |
+{ |
|
208 |
+ return *(const uint16_t*)a - *(const uint16_t*)b; |
|
209 |
+} |
|
210 |
+ |
|
211 |
+/* Measure the overhead of the timing code (in decicycles) */ |
|
212 |
+static int measure_nop_time(void) |
|
213 |
+{ |
|
214 |
+ uint16_t nops[10000]; |
|
215 |
+ int i, nop_sum = 0; |
|
216 |
+ |
|
217 |
+ for (i = 0; i < 10000; i++) { |
|
218 |
+ uint64_t t = AV_READ_TIME(); |
|
219 |
+ nops[i] = AV_READ_TIME() - t; |
|
220 |
+ } |
|
221 |
+ |
|
222 |
+ qsort(nops, 10000, sizeof(uint16_t), cmp_nop); |
|
223 |
+ for (i = 2500; i < 7500; i++) |
|
224 |
+ nop_sum += nops[i]; |
|
225 |
+ |
|
226 |
+ return nop_sum / 500; |
|
227 |
+} |
|
228 |
+ |
|
229 |
+/* Print benchmark results */ |
|
230 |
+static void print_benchs(CheckasmFunc *f) |
|
231 |
+{ |
|
232 |
+ if (f) { |
|
233 |
+ print_benchs(f->child[0]); |
|
234 |
+ |
|
235 |
+ /* Only print functions with at least one assembly version */ |
|
236 |
+ if (f->versions.cpu || f->versions.next) { |
|
237 |
+ CheckasmFuncVersion *v = &f->versions; |
|
238 |
+ do { |
|
239 |
+ if (v->iterations) { |
|
240 |
+ int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4; |
|
241 |
+ printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); |
|
242 |
+ } |
|
243 |
+ } while ((v = v->next)); |
|
244 |
+ } |
|
245 |
+ |
|
246 |
+ print_benchs(f->child[1]); |
|
247 |
+ } |
|
248 |
+} |
|
249 |
+#endif |
|
250 |
+ |
|
251 |
+/* ASCIIbetical sort except preserving natural order for numbers */ |
|
252 |
+static int cmp_func_names(const char *a, const char *b) |
|
253 |
+{ |
|
254 |
+ int ascii_diff, digit_diff; |
|
255 |
+ |
|
256 |
+ for (; !(ascii_diff = *a - *b) && *a; a++, b++); |
|
257 |
+ for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); |
|
258 |
+ |
|
259 |
+ return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff; |
|
260 |
+} |
|
261 |
+ |
|
262 |
+/* Get a node with the specified name, creating it if it doesn't exist */ |
|
263 |
+static CheckasmFunc *get_func(const char *name, int length) |
|
264 |
+{ |
|
265 |
+ CheckasmFunc *f, **f_ptr = &state.funcs; |
|
266 |
+ |
|
267 |
+ /* Search the tree for a matching node */ |
|
268 |
+ while ((f = *f_ptr)) { |
|
269 |
+ int cmp = cmp_func_names(name, f->name); |
|
270 |
+ if (!cmp) |
|
271 |
+ return f; |
|
272 |
+ |
|
273 |
+ f_ptr = &f->child[(cmp > 0)]; |
|
274 |
+ } |
|
275 |
+ |
|
276 |
+ /* Allocate and insert a new node into the tree */ |
|
277 |
+ f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length); |
|
278 |
+ memcpy(f->name, name, length+1); |
|
279 |
+ |
|
280 |
+ return f; |
|
281 |
+} |
|
282 |
+ |
|
283 |
+/* Perform tests and benchmarks for the specified cpu flag if supported by the host */ |
|
284 |
+static void check_cpu_flag(const char *name, int flag) |
|
285 |
+{ |
|
286 |
+ int old_cpu_flag = state.cpu_flag; |
|
287 |
+ |
|
288 |
+ flag |= old_cpu_flag; |
|
289 |
+ av_set_cpu_flags_mask(flag); |
|
290 |
+ state.cpu_flag = av_get_cpu_flags(); |
|
291 |
+ |
|
292 |
+ if (!flag || state.cpu_flag != old_cpu_flag) { |
|
293 |
+ int i; |
|
294 |
+ |
|
295 |
+ state.cpu_flag_name = name; |
|
296 |
+ for (i = 0; tests[i]; i++) |
|
297 |
+ tests[i](); |
|
298 |
+ } |
|
299 |
+} |
|
300 |
+ |
|
301 |
+/* Print the name of the current CPU flag, but only do it once */ |
|
302 |
+static void print_cpu_name(void) |
|
303 |
+{ |
|
304 |
+ if (state.cpu_flag_name) { |
|
305 |
+ color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); |
|
306 |
+ state.cpu_flag_name = NULL; |
|
307 |
+ } |
|
308 |
+} |
|
309 |
+ |
|
310 |
+int main(int argc, char *argv[]) |
|
311 |
+{ |
|
312 |
+ int i, seed, ret = 0; |
|
313 |
+ |
|
314 |
+ if (!tests[0] || !cpus[0].flag) { |
|
315 |
+ fprintf(stderr, "checkasm: no tests to perform\n"); |
|
316 |
+ return 1; |
|
317 |
+ } |
|
318 |
+ |
|
319 |
+ if (argc > 1 && !strncmp(argv[1], "--bench", 7)) { |
|
320 |
+#ifndef AV_READ_TIME |
|
321 |
+ fprintf(stderr, "checkasm: --bench is not supported on your system\n"); |
|
322 |
+ return 1; |
|
323 |
+#endif |
|
324 |
+ if (argv[1][7] == '=') { |
|
325 |
+ state.bench_pattern = argv[1] + 8; |
|
326 |
+ state.bench_pattern_len = strlen(state.bench_pattern); |
|
327 |
+ } else |
|
328 |
+ state.bench_pattern = ""; |
|
329 |
+ |
|
330 |
+ argc--; |
|
331 |
+ argv++; |
|
332 |
+ } |
|
333 |
+ |
|
334 |
+ seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed(); |
|
335 |
+ fprintf(stderr, "checkasm: using random seed %u\n", seed); |
|
336 |
+ av_lfg_init(&checkasm_lfg, seed); |
|
337 |
+ |
|
338 |
+ check_cpu_flag(NULL, 0); |
|
339 |
+ for (i = 0; cpus[i].flag; i++) |
|
340 |
+ check_cpu_flag(cpus[i].name, cpus[i].flag); |
|
341 |
+ |
|
342 |
+ if (state.num_failed) { |
|
343 |
+ fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); |
|
344 |
+ ret = 1; |
|
345 |
+ } else { |
|
346 |
+ fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); |
|
347 |
+#ifdef AV_READ_TIME |
|
348 |
+ if (state.bench_pattern) { |
|
349 |
+ state.nop_time = measure_nop_time(); |
|
350 |
+ printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); |
|
351 |
+ print_benchs(state.funcs); |
|
352 |
+ } |
|
353 |
+#endif |
|
354 |
+ } |
|
355 |
+ |
|
356 |
+ destroy_func_tree(state.funcs); |
|
357 |
+ return ret; |
|
358 |
+} |
|
359 |
+ |
|
360 |
+/* Decide whether or not the specified function needs to be tested and |
|
361 |
+ * allocate/initialize data structures if needed. Returns a pointer to a |
|
362 |
+ * reference function if the function should be tested, otherwise NULL */ |
|
363 |
+intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() |
|
364 |
+{ |
|
365 |
+ char name_buf[256]; |
|
366 |
+ intptr_t (*ref)() = func; |
|
367 |
+ CheckasmFuncVersion *v; |
|
368 |
+ int name_length; |
|
369 |
+ va_list arg; |
|
370 |
+ |
|
371 |
+ va_start(arg, name); |
|
372 |
+ name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); |
|
373 |
+ va_end(arg); |
|
374 |
+ |
|
375 |
+ if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) |
|
376 |
+ return NULL; |
|
377 |
+ |
|
378 |
+ state.current_func = get_func(name_buf, name_length); |
|
379 |
+ v = &state.current_func->versions; |
|
380 |
+ |
|
381 |
+ if (v->func) { |
|
382 |
+ CheckasmFuncVersion *prev; |
|
383 |
+ do { |
|
384 |
+ /* Only test functions that haven't already been tested */ |
|
385 |
+ if (v->func == func) |
|
386 |
+ return NULL; |
|
387 |
+ |
|
388 |
+ if (v->ok) |
|
389 |
+ ref = v->func; |
|
390 |
+ |
|
391 |
+ prev = v; |
|
392 |
+ } while ((v = v->next)); |
|
393 |
+ |
|
394 |
+ v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); |
|
395 |
+ } |
|
396 |
+ |
|
397 |
+ v->func = func; |
|
398 |
+ v->ok = 1; |
|
399 |
+ v->cpu = state.cpu_flag; |
|
400 |
+ state.current_func_ver = v; |
|
401 |
+ |
|
402 |
+ if (state.cpu_flag) |
|
403 |
+ state.num_checked++; |
|
404 |
+ |
|
405 |
+ return ref; |
|
406 |
+} |
|
407 |
+ |
|
408 |
+/* Decide whether or not the current function needs to be benchmarked */ |
|
409 |
+int checkasm_bench_func(void) |
|
410 |
+{ |
|
411 |
+ return !state.num_failed && state.bench_pattern && |
|
412 |
+ !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); |
|
413 |
+} |
|
414 |
+ |
|
415 |
+/* Indicate that the current test has failed */ |
|
416 |
+void checkasm_fail_func(const char *msg, ...) |
|
417 |
+{ |
|
418 |
+ if (state.current_func_ver->cpu && state.current_func_ver->ok) { |
|
419 |
+ va_list arg; |
|
420 |
+ |
|
421 |
+ print_cpu_name(); |
|
422 |
+ fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); |
|
423 |
+ va_start(arg, msg); |
|
424 |
+ vfprintf(stderr, msg, arg); |
|
425 |
+ va_end(arg); |
|
426 |
+ fprintf(stderr, ")\n"); |
|
427 |
+ |
|
428 |
+ state.current_func_ver->ok = 0; |
|
429 |
+ state.num_failed++; |
|
430 |
+ } |
|
431 |
+} |
|
432 |
+ |
|
433 |
+/* Update benchmark results of the current function */ |
|
434 |
+void checkasm_update_bench(int iterations, uint64_t cycles) |
|
435 |
+{ |
|
436 |
+ state.current_func_ver->iterations += iterations; |
|
437 |
+ state.current_func_ver->cycles += cycles; |
|
438 |
+} |
|
439 |
+ |
|
440 |
+/* Print the outcome of all tests performed since the last time this function was called */ |
|
441 |
+void checkasm_report(const char *name, ...) |
|
442 |
+{ |
|
443 |
+ static int prev_checked, prev_failed, max_length; |
|
444 |
+ |
|
445 |
+ if (state.num_checked > prev_checked) { |
|
446 |
+ print_cpu_name(); |
|
447 |
+ |
|
448 |
+ if (*name) { |
|
449 |
+ int pad_length = max_length; |
|
450 |
+ va_list arg; |
|
451 |
+ |
|
452 |
+ fprintf(stderr, " - "); |
|
453 |
+ va_start(arg, name); |
|
454 |
+ pad_length -= vfprintf(stderr, name, arg); |
|
455 |
+ va_end(arg); |
|
456 |
+ fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); |
|
457 |
+ } else |
|
458 |
+ fprintf(stderr, " - %-*s [", max_length, state.current_func->name); |
|
459 |
+ |
|
460 |
+ if (state.num_failed == prev_failed) |
|
461 |
+ color_printf(COLOR_GREEN, "OK"); |
|
462 |
+ else |
|
463 |
+ color_printf(COLOR_RED, "FAILED"); |
|
464 |
+ fprintf(stderr, "]\n"); |
|
465 |
+ |
|
466 |
+ prev_checked = state.num_checked; |
|
467 |
+ prev_failed = state.num_failed; |
|
468 |
+ } else if (!state.cpu_flag) { |
|
469 |
+ int length; |
|
470 |
+ |
|
471 |
+ /* Calculate the amount of padding required to make the output vertically aligned */ |
|
472 |
+ if (*name) { |
|
473 |
+ va_list arg; |
|
474 |
+ va_start(arg, name); |
|
475 |
+ length = vsnprintf(NULL, 0, name, arg); |
|
476 |
+ va_end(arg); |
|
477 |
+ } else |
|
478 |
+ length = strlen(state.current_func->name); |
|
479 |
+ |
|
480 |
+ if (length > max_length) |
|
481 |
+ max_length = length; |
|
482 |
+ } |
|
483 |
+} |
0 | 484 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,115 @@ |
0 |
+/* |
|
1 |
+ * Assembly testing and benchmarking tool |
|
2 |
+ * Copyright (c) 2015 Henrik Gramner |
|
3 |
+ * Copyright (c) 2008 Loren Merritt |
|
4 |
+ * |
|
5 |
+ * This file is part of Libav. |
|
6 |
+ * |
|
7 |
+ * Libav is free software; you can redistribute it and/or modify |
|
8 |
+ * it under the terms of the GNU General Public License as published by |
|
9 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
10 |
+ * (at your option) any later version. |
|
11 |
+ * |
|
12 |
+ * Libav is distributed in the hope that it will be useful, |
|
13 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15 |
+ * GNU General Public License for more details. |
|
16 |
+ * |
|
17 |
+ * You should have received a copy of the GNU General Public License along |
|
18 |
+ * with Libav; if not, write to the Free Software Foundation, Inc., |
|
19 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 |
+ */ |
|
21 |
+ |
|
22 |
+#ifndef CHECKASM_H |
|
23 |
+#define CHECKASM_H |
|
24 |
+ |
|
25 |
+#include <stdint.h> |
|
26 |
+#include "config.h" |
|
27 |
+#include "libavutil/avstring.h" |
|
28 |
+#include "libavutil/lfg.h" |
|
29 |
+#include "libavutil/timer.h" |
|
30 |
+ |
|
31 |
+void checkasm_check_h264pred(void); |
|
32 |
+ |
|
33 |
+intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3); |
|
34 |
+int checkasm_bench_func(void); |
|
35 |
+void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); |
|
36 |
+void checkasm_update_bench(int iterations, uint64_t cycles); |
|
37 |
+void checkasm_report(const char *name, ...) av_printf_format(1, 2); |
|
38 |
+ |
|
39 |
+extern AVLFG checkasm_lfg; |
|
40 |
+#define rnd() av_lfg_get(&checkasm_lfg) |
|
41 |
+ |
|
42 |
+static av_unused intptr_t (*func_ref)(); |
|
43 |
+static av_unused intptr_t (*func_new)(); |
|
44 |
+ |
|
45 |
+#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */ |
|
46 |
+ |
|
47 |
+/* Decide whether or not the specified function needs to be tested */ |
|
48 |
+#define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\ |
|
49 |
+ (func_ref = checkasm_check_func(func_new, __VA_ARGS__))) |
|
50 |
+ |
|
51 |
+/* Indicate that the current test has failed */ |
|
52 |
+#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) |
|
53 |
+ |
|
54 |
+/* Print the test outcome */ |
|
55 |
+#define report(...) checkasm_report("" __VA_ARGS__) |
|
56 |
+ |
|
57 |
+/* Call the reference function */ |
|
58 |
+#define call_ref(...) func_ref(__VA_ARGS__) |
|
59 |
+ |
|
60 |
+#if ARCH_X86 && HAVE_YASM |
|
61 |
+/* Verifies that clobbered callee-saved registers are properly saved and restored */ |
|
62 |
+intptr_t checkasm_checked_call(intptr_t (*func)(), ...); |
|
63 |
+#endif |
|
64 |
+ |
|
65 |
+/* Call the function */ |
|
66 |
+#if ARCH_X86_64 && HAVE_YASM |
|
67 |
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. |
|
68 |
+ * This is done by clobbering the stack with junk around the stack pointer and calling the |
|
69 |
+ * assembly function through x264_checkasm_call with added dummy arguments which forces all |
|
70 |
+ * real arguments to be passed on the stack and not in registers. For 32-bit arguments the |
|
71 |
+ * upper half of the 64-bit register locations on the stack will now contain junk which will |
|
72 |
+ * cause misbehaving functions to either produce incorrect output or segfault. Note that |
|
73 |
+ * even though this works extremely well in practice, it's technically not guaranteed |
|
74 |
+ * and false negatives is theoretically possible, but there can never be any false positives. |
|
75 |
+ */ |
|
76 |
+void checkasm_stack_clobber(uint64_t clobber, ...); |
|
77 |
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) |
|
78 |
+#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ |
|
79 |
+ CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ |
|
80 |
+ checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) |
|
81 |
+#elif ARCH_X86_32 && HAVE_YASM |
|
82 |
+#define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__) |
|
83 |
+#else |
|
84 |
+#define call_new(...) func_new(__VA_ARGS__) |
|
85 |
+#endif |
|
86 |
+ |
|
87 |
+/* Benchmark the function */ |
|
88 |
+#ifdef AV_READ_TIME |
|
89 |
+#define bench_new(...)\ |
|
90 |
+ do {\ |
|
91 |
+ if (checkasm_bench_func()) {\ |
|
92 |
+ intptr_t (*tfunc)() = func_new;\ |
|
93 |
+ uint64_t tsum = 0;\ |
|
94 |
+ int ti, tcount = 0;\ |
|
95 |
+ for (ti = 0; ti < BENCH_RUNS; ti++) {\ |
|
96 |
+ uint64_t t = AV_READ_TIME();\ |
|
97 |
+ tfunc(__VA_ARGS__);\ |
|
98 |
+ tfunc(__VA_ARGS__);\ |
|
99 |
+ tfunc(__VA_ARGS__);\ |
|
100 |
+ tfunc(__VA_ARGS__);\ |
|
101 |
+ t = AV_READ_TIME() - t;\ |
|
102 |
+ if (t*tcount <= tsum*4 && ti > 0) {\ |
|
103 |
+ tsum += t;\ |
|
104 |
+ tcount++;\ |
|
105 |
+ }\ |
|
106 |
+ }\ |
|
107 |
+ checkasm_update_bench(tcount, tsum);\ |
|
108 |
+ }\ |
|
109 |
+ } while (0) |
|
110 |
+#else |
|
111 |
+#define bench_new(...) |
|
112 |
+#endif |
|
113 |
+ |
|
114 |
+#endif |
0 | 115 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,252 @@ |
0 |
+/* |
|
1 |
+ * Copyright (c) 2015 Henrik Gramner |
|
2 |
+ * |
|
3 |
+ * This file is part of Libav. |
|
4 |
+ * |
|
5 |
+ * Libav is free software; you can redistribute it and/or modify |
|
6 |
+ * it under the terms of the GNU General Public License as published by |
|
7 |
+ * the Free Software Foundation; either version 2 of the License, or |
|
8 |
+ * (at your option) any later version. |
|
9 |
+ * |
|
10 |
+ * Libav is distributed in the hope that it will be useful, |
|
11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
13 |
+ * GNU General Public License for more details. |
|
14 |
+ * |
|
15 |
+ * You should have received a copy of the GNU General Public License along |
|
16 |
+ * with Libav; if not, write to the Free Software Foundation, Inc., |
|
17 |
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 |
+ */ |
|
19 |
+ |
|
20 |
+#include <string.h> |
|
21 |
+#include "checkasm.h" |
|
22 |
+#include "libavcodec/avcodec.h" |
|
23 |
+#include "libavcodec/h264pred.h" |
|
24 |
+#include "libavutil/common.h" |
|
25 |
+#include "libavutil/intreadwrite.h" |
|
26 |
+ |
|
27 |
+static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 }; |
|
28 |
+ |
|
29 |
+static const char * const pred4x4_modes[4][15] = { |
|
30 |
+ { /* H264 */ |
|
31 |
+ [VERT_PRED ] = "vertical", |
|
32 |
+ [HOR_PRED ] = "horizontal", |
|
33 |
+ [DC_PRED ] = "dc", |
|
34 |
+ [DIAG_DOWN_LEFT_PRED ] = "down_left", |
|
35 |
+ [DIAG_DOWN_RIGHT_PRED] = "down_right", |
|
36 |
+ [VERT_RIGHT_PRED ] = "vertical_right", |
|
37 |
+ [HOR_DOWN_PRED ] = "horizontal_right", |
|
38 |
+ [VERT_LEFT_PRED ] = "vertical_left", |
|
39 |
+ [HOR_UP_PRED ] = "horizontal_up", |
|
40 |
+ [LEFT_DC_PRED ] = "left_dc", |
|
41 |
+ [TOP_DC_PRED ] = "top_dc", |
|
42 |
+ [DC_128_PRED ] = "dc_128", |
|
43 |
+ }, |
|
44 |
+ { /* VP8 */ |
|
45 |
+ [VERT_PRED ] = "vertical_vp8", |
|
46 |
+ [HOR_PRED ] = "horizontal_vp8", |
|
47 |
+ [VERT_LEFT_PRED] = "vertical_left_vp8", |
|
48 |
+ [TM_VP8_PRED ] = "tm_vp8", |
|
49 |
+ [DC_127_PRED ] = "dc_127_vp8", |
|
50 |
+ [DC_129_PRED ] = "dc_129_vp8", |
|
51 |
+ }, |
|
52 |
+ { /* RV40 */ |
|
53 |
+ [DIAG_DOWN_LEFT_PRED ] = "down_left_rv40", |
|
54 |
+ [VERT_LEFT_PRED ] = "vertical_left_rv40", |
|
55 |
+ [HOR_UP_PRED ] = "horizontal_up_rv40", |
|
56 |
+ [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40", |
|
57 |
+ [HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40", |
|
58 |
+ [VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40", |
|
59 |
+ }, |
|
60 |
+ { /* SVQ3 */ |
|
61 |
+ [DIAG_DOWN_LEFT_PRED] = "down_left_svq3", |
|
62 |
+ }, |
|
63 |
+}; |
|
64 |
+ |
|
65 |
+static const char * const pred8x8_modes[4][11] = { |
|
66 |
+ { /* H264 */ |
|
67 |
+ [DC_PRED8x8 ] = "dc", |
|
68 |
+ [HOR_PRED8x8 ] = "horizontal", |
|
69 |
+ [VERT_PRED8x8 ] = "vertical", |
|
70 |
+ [PLANE_PRED8x8 ] = "plane", |
|
71 |
+ [LEFT_DC_PRED8x8 ] = "left_dc", |
|
72 |
+ [TOP_DC_PRED8x8 ] = "top_dc", |
|
73 |
+ [DC_128_PRED8x8 ] = "dc_128", |
|
74 |
+ [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t", |
|
75 |
+ [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt", |
|
76 |
+ [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00", |
|
77 |
+ [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0", |
|
78 |
+ }, |
|
79 |
+ { /* VP8 */ |
|
80 |
+ [PLANE_PRED8x8 ] = "tm_vp8", |
|
81 |
+ [DC_127_PRED8x8] = "dc_127_vp8", |
|
82 |
+ [DC_129_PRED8x8] = "dc_129_vp8", |
|
83 |
+ }, |
|
84 |
+ { /* RV40 */ |
|
85 |
+ [DC_PRED8x8 ] = "dc_rv40", |
|
86 |
+ [LEFT_DC_PRED8x8] = "left_dc_rv40", |
|
87 |
+ [TOP_DC_PRED8x8 ] = "top_dc_rv40", |
|
88 |
+ }, |
|
89 |
+ { /* SVQ3 */ |
|
90 |
+ }, |
|
91 |
+}; |
|
92 |
+ |
|
93 |
+static const char * const pred16x16_modes[4][9] = { |
|
94 |
+ { /* H264 */ |
|
95 |
+ [DC_PRED8x8 ] = "dc", |
|
96 |
+ [HOR_PRED8x8 ] = "horizontal", |
|
97 |
+ [VERT_PRED8x8 ] = "vertical", |
|
98 |
+ [PLANE_PRED8x8 ] = "plane", |
|
99 |
+ [LEFT_DC_PRED8x8] = "left_dc", |
|
100 |
+ [TOP_DC_PRED8x8 ] = "top_dc", |
|
101 |
+ [DC_128_PRED8x8 ] = "dc_128", |
|
102 |
+ }, |
|
103 |
+ { /* VP8 */ |
|
104 |
+ [PLANE_PRED8x8 ] = "tm_vp8", |
|
105 |
+ [DC_127_PRED8x8] = "dc_127_vp8", |
|
106 |
+ [DC_129_PRED8x8] = "dc_129_vp8", |
|
107 |
+ }, |
|
108 |
+ { /* RV40 */ |
|
109 |
+ [PLANE_PRED8x8] = "plane_rv40", |
|
110 |
+ }, |
|
111 |
+ { /* SVQ3 */ |
|
112 |
+ [PLANE_PRED8x8] = "plane_svq3", |
|
113 |
+ }, |
|
114 |
+}; |
|
115 |
+ |
|
116 |
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; |
|
117 |
+ |
|
118 |
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8) |
|
119 |
+#define BUF_SIZE (3*16*17) |
|
120 |
+ |
|
121 |
+#define check_pred_func(func, name, mode_name)\ |
|
122 |
+ (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\ |
|
123 |
+ check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\ |
|
124 |
+ check_func(func, "pred%s_%s", name, mode_name))) |
|
125 |
+ |
|
126 |
+#define randomize_buffers()\ |
|
127 |
+ do {\ |
|
128 |
+ uint32_t mask = pixel_mask[bit_depth-8];\ |
|
129 |
+ int i;\ |
|
130 |
+ for (i = 0; i < BUF_SIZE; i += 4) {\ |
|
131 |
+ uint32_t r = rnd() & mask;\ |
|
132 |
+ AV_WN32A(buf0+i, r);\ |
|
133 |
+ AV_WN32A(buf1+i, r);\ |
|
134 |
+ }\ |
|
135 |
+ } while (0) |
|
136 |
+ |
|
137 |
+#define src0 (buf0 + 4*16) /* Offset to allow room for top and left */ |
|
138 |
+#define src1 (buf1 + 4*16) |
|
139 |
+ |
|
140 |
+static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
|
141 |
+ int codec, int chroma_format, int bit_depth) |
|
142 |
+{ |
|
143 |
+ if (chroma_format == 1) { |
|
144 |
+ uint8_t *topright = buf0 + 2*16; |
|
145 |
+ int pred_mode; |
|
146 |
+ for (pred_mode = 0; pred_mode < 15; pred_mode++) { |
|
147 |
+ if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { |
|
148 |
+ randomize_buffers(); |
|
149 |
+ call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL); |
|
150 |
+ call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); |
|
151 |
+ if (memcmp(buf0, buf1, BUF_SIZE)) |
|
152 |
+ fail(); |
|
153 |
+ bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); |
|
154 |
+ } |
|
155 |
+ } |
|
156 |
+ } |
|
157 |
+} |
|
158 |
+ |
|
159 |
+static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
|
160 |
+ int codec, int chroma_format, int bit_depth) |
|
161 |
+{ |
|
162 |
+ int pred_mode; |
|
163 |
+ for (pred_mode = 0; pred_mode < 11; pred_mode++) { |
|
164 |
+ if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", |
|
165 |
+ pred8x8_modes[codec][pred_mode])) { |
|
166 |
+ randomize_buffers(); |
|
167 |
+ call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL); |
|
168 |
+ call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); |
|
169 |
+ if (memcmp(buf0, buf1, BUF_SIZE)) |
|
170 |
+ fail(); |
|
171 |
+ bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); |
|
172 |
+ } |
|
173 |
+ } |
|
174 |
+} |
|
175 |
+ |
|
176 |
+static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
|
177 |
+ int codec, int chroma_format, int bit_depth) |
|
178 |
+{ |
|
179 |
+ if (chroma_format == 1) { |
|
180 |
+ int pred_mode; |
|
181 |
+ for (pred_mode = 0; pred_mode < 9; pred_mode++) { |
|
182 |
+ if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { |
|
183 |
+ randomize_buffers(); |
|
184 |
+ call_ref(src0, (ptrdiff_t)48); |
|
185 |
+ call_new(src1, (ptrdiff_t)48); |
|
186 |
+ if (memcmp(buf0, buf1, BUF_SIZE)) |
|
187 |
+ fail(); |
|
188 |
+ bench_new(src1, (ptrdiff_t)48); |
|
189 |
+ } |
|
190 |
+ } |
|
191 |
+ } |
|
192 |
+} |
|
193 |
+ |
|
194 |
+static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, |
|
195 |
+ int codec, int chroma_format, int bit_depth) |
|
196 |
+{ |
|
197 |
+ if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { |
|
198 |
+ int pred_mode; |
|
199 |
+ for (pred_mode = 0; pred_mode < 12; pred_mode++) { |
|
200 |
+ if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { |
|
201 |
+ int neighbors; |
|
202 |
+ for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) { |
|
203 |
+ int has_topleft = neighbors & 0x8000; |
|
204 |
+ int has_topright = neighbors & 0x4000; |
|
205 |
+ |
|
206 |
+ if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft) |
|
207 |
+ continue; /* Those aren't allowed according to the spec */ |
|
208 |
+ |
|
209 |
+ randomize_buffers(); |
|
210 |
+ call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); |
|
211 |
+ call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); |
|
212 |
+ if (memcmp(buf0, buf1, BUF_SIZE)) |
|
213 |
+ fail(); |
|
214 |
+ bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); |
|
215 |
+ } |
|
216 |
+ } |
|
217 |
+ } |
|
218 |
+ } |
|
219 |
+} |
|
220 |
+ |
|
221 |
+/* TODO: Add tests for H.264 lossless H/V prediction */ |
|
222 |
+ |
|
223 |
+void checkasm_check_h264pred(void) |
|
224 |
+{ |
|
225 |
+ static const struct { |
|
226 |
+ void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int); |
|
227 |
+ const char *name; |
|
228 |
+ } tests[] = { |
|
229 |
+ { check_pred4x4, "pred4x4" }, |
|
230 |
+ { check_pred8x8, "pred8x8" }, |
|
231 |
+ { check_pred16x16, "pred16x16" }, |
|
232 |
+ { check_pred8x8l, "pred8x8l" }, |
|
233 |
+ }; |
|
234 |
+ |
|
235 |
+ DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE]; |
|
236 |
+ DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE]; |
|
237 |
+ H264PredContext h; |
|
238 |
+ int test, codec, chroma_format, bit_depth; |
|
239 |
+ |
|
240 |
+ for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) { |
|
241 |
+ for (codec = 0; codec < 4; codec++) { |
|
242 |
+ int codec_id = codec_ids[codec]; |
|
243 |
+ for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++) |
|
244 |
+ for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) { |
|
245 |
+ ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format); |
|
246 |
+ tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth); |
|
247 |
+ } |
|
248 |
+ } |
|
249 |
+ report("%s", tests[test].name); |
|
250 |
+ } |
|
251 |
+} |
0 | 6 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,193 @@ |
0 |
+;***************************************************************************** |
|
1 |
+;* Assembly testing and benchmarking tool |
|
2 |
+;* Copyright (c) 2008 Loren Merritt |
|
3 |
+;* Copyright (c) 2012 Henrik Gramner |
|
4 |
+;* |
|
5 |
+;* This file is part of Libav. |
|
6 |
+;* |
|
7 |
+;* Libav is free software; you can redistribute it and/or modify |
|
8 |
+;* it under the terms of the GNU General Public License as published by |
|
9 |
+;* the Free Software Foundation; either version 2 of the License, or |
|
10 |
+;* (at your option) any later version. |
|
11 |
+;* |
|
12 |
+;* Libav is distributed in the hope that it will be useful, |
|
13 |
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
15 |
+;* GNU General Public License for more details. |
|
16 |
+;* |
|
17 |
+;* You should have received a copy of the GNU General Public License |
|
18 |
+;* along with this program; if not, write to the Free Software |
|
19 |
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
|
20 |
+;***************************************************************************** |
|
21 |
+ |
|
22 |
+%define private_prefix checkasm |
|
23 |
+%include "libavutil/x86/x86inc.asm" |
|
24 |
+ |
|
25 |
+SECTION_RODATA |
|
26 |
+ |
|
27 |
+error_message: db "failed to preserve register", 0 |
|
28 |
+ |
|
29 |
+%if ARCH_X86_64 |
|
30 |
+; just random numbers to reduce the chance of incidental match |
|
31 |
+ALIGN 16 |
|
32 |
+x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 |
|
33 |
+x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 |
|
34 |
+x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e |
|
35 |
+x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f |
|
36 |
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 |
|
37 |
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d |
|
38 |
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b |
|
39 |
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 |
|
40 |
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef |
|
41 |
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 |
|
42 |
+n7: dq 0x21f86d66c8ca00ce |
|
43 |
+n8: dq 0x75b6ba21077c48ad |
|
44 |
+n9: dq 0xed56bb2dcb3c7736 |
|
45 |
+n10: dq 0x8bda43d3fd1a7e06 |
|
46 |
+n11: dq 0xb64a9c9e5d318408 |
|
47 |
+n12: dq 0xdf9a54b303f1d3a3 |
|
48 |
+n13: dq 0x4a75479abd64e097 |
|
49 |
+n14: dq 0x249214109d5d1c88 |
|
50 |
+%endif |
|
51 |
+ |
|
52 |
+SECTION .text |
|
53 |
+ |
|
54 |
+cextern fail_func |
|
55 |
+ |
|
56 |
+; max number of args used by any asm function. |
|
57 |
+; (max_args % 4) must equal 3 for stack alignment |
|
58 |
+%define max_args 15 |
|
59 |
+ |
|
60 |
+%if ARCH_X86_64 |
|
61 |
+ |
|
62 |
+;----------------------------------------------------------------------------- |
|
63 |
+; int checkasm_stack_clobber(uint64_t clobber, ...) |
|
64 |
+;----------------------------------------------------------------------------- |
|
65 |
+cglobal stack_clobber, 1,2 |
|
66 |
+ ; Clobber the stack with junk below the stack pointer |
|
67 |
+ %define size (max_args+6)*8 |
|
68 |
+ SUB rsp, size |
|
69 |
+ mov r1, size-8 |
|
70 |
+.loop: |
|
71 |
+ mov [rsp+r1], r0 |
|
72 |
+ sub r1, 8 |
|
73 |
+ jge .loop |
|
74 |
+ ADD rsp, size |
|
75 |
+ RET |
|
76 |
+ |
|
77 |
+%if WIN64 |
|
78 |
+ %assign free_regs 7 |
|
79 |
+%else |
|
80 |
+ %assign free_regs 9 |
|
81 |
+%endif |
|
82 |
+ |
|
83 |
+;----------------------------------------------------------------------------- |
|
84 |
+; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) |
|
85 |
+;----------------------------------------------------------------------------- |
|
86 |
+INIT_XMM |
|
87 |
+cglobal checked_call, 2,15,16,max_args*8+8 |
|
88 |
+ mov r6, r0 |
|
89 |
+ |
|
90 |
+ ; All arguments have been pushed on the stack instead of registers in order to |
|
91 |
+ ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. |
|
92 |
+ mov r0, r6mp |
|
93 |
+ mov r1, r7mp |
|
94 |
+ mov r2, r8mp |
|
95 |
+ mov r3, r9mp |
|
96 |
+%if UNIX64 |
|
97 |
+ mov r4, r10mp |
|
98 |
+ mov r5, r11mp |
|
99 |
+ %assign i 6 |
|
100 |
+ %rep max_args-6 |
|
101 |
+ mov r9, [rsp+stack_offset+(i+1)*8] |
|
102 |
+ mov [rsp+(i-6)*8], r9 |
|
103 |
+ %assign i i+1 |
|
104 |
+ %endrep |
|
105 |
+%else |
|
106 |
+ %assign i 4 |
|
107 |
+ %rep max_args-4 |
|
108 |
+ mov r9, [rsp+stack_offset+(i+7)*8] |
|
109 |
+ mov [rsp+i*8], r9 |
|
110 |
+ %assign i i+1 |
|
111 |
+ %endrep |
|
112 |
+%endif |
|
113 |
+ |
|
114 |
+%if WIN64 |
|
115 |
+ %assign i 6 |
|
116 |
+ %rep 16-6 |
|
117 |
+ mova m %+ i, [x %+ i] |
|
118 |
+ %assign i i+1 |
|
119 |
+ %endrep |
|
120 |
+%endif |
|
121 |
+ |
|
122 |
+%assign i 14 |
|
123 |
+%rep 15-free_regs |
|
124 |
+ mov r %+ i, [n %+ i] |
|
125 |
+ %assign i i-1 |
|
126 |
+%endrep |
|
127 |
+ call r6 |
|
128 |
+%assign i 14 |
|
129 |
+%rep 15-free_regs |
|
130 |
+ xor r %+ i, [n %+ i] |
|
131 |
+ or r14, r %+ i |
|
132 |
+ %assign i i-1 |
|
133 |
+%endrep |
|
134 |
+ |
|
135 |
+%if WIN64 |
|
136 |
+ %assign i 6 |
|
137 |
+ %rep 16-6 |
|
138 |
+ pxor m %+ i, [x %+ i] |
|
139 |
+ por m6, m %+ i |
|
140 |
+ %assign i i+1 |
|
141 |
+ %endrep |
|
142 |
+ packsswb m6, m6 |
|
143 |
+ movq r5, m6 |
|
144 |
+ or r14, r5 |
|
145 |
+%endif |
|
146 |
+ |
|
147 |
+ jz .ok |
|
148 |
+ mov r9, rax |
|
149 |
+ lea r0, [error_message] |
|
150 |
+ call fail_func |
|
151 |
+ mov rax, r9 |
|
152 |
+.ok: |
|
153 |
+ RET |
|
154 |
+ |
|
155 |
+%else |
|
156 |
+ |
|
157 |
+; just random numbers to reduce the chance of incidental match |
|
158 |
+%define n3 dword 0x6549315c |
|
159 |
+%define n4 dword 0xe02f3e23 |
|
160 |
+%define n5 dword 0xb78d0d1d |
|
161 |
+%define n6 dword 0x33627ba7 |
|
162 |
+ |
|
163 |
+;----------------------------------------------------------------------------- |
|
164 |
+; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) |
|
165 |
+;----------------------------------------------------------------------------- |
|
166 |
+cglobal checked_call, 1,7 |
|
167 |
+ mov r3, n3 |
|
168 |
+ mov r4, n4 |
|
169 |
+ mov r5, n5 |
|
170 |
+ mov r6, n6 |
|
171 |
+%rep max_args |
|
172 |
+ PUSH dword [esp+20+max_args*4] |
|
173 |
+%endrep |
|
174 |
+ call r0 |
|
175 |
+ xor r3, n3 |
|
176 |
+ xor r4, n4 |
|
177 |
+ xor r5, n5 |
|
178 |
+ xor r6, n6 |
|
179 |
+ or r3, r4 |
|
180 |
+ or r5, r6 |
|
181 |
+ or r3, r5 |
|
182 |
+ jz .ok |
|
183 |
+ mov r3, eax |
|
184 |
+ lea r0, [error_message] |
|
185 |
+ mov [esp], r0 |
|
186 |
+ call fail_func |
|
187 |
+ mov eax, r3 |
|
188 |
+.ok: |
|
189 |
+ add esp, max_args*4 |
|
190 |
+ REP_RET |
|
191 |
+ |
|
192 |
+%endif ; ARCH_X86_64 |