Browse code

Merge commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717'

* commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717':
Checkasm: assembly testing and benchmarking tool

Merged-by: Michael Niedermayer <michael@niedermayer.cc>

Michael Niedermayer authored on 2015/07/13 04:00:50
Showing 8 changed files
... ...
@@ -63,6 +63,7 @@
63 63
 /libavutil/ffversion.h
64 64
 /tests/audiogen
65 65
 /tests/base64
66
+/tests/checkasm/checkasm
66 67
 /tests/data/
67 68
 /tests/pixfmts.mak
68 69
 /tests/rotozoom
... ...
@@ -230,5 +230,7 @@ testclean::
230 230
 
231 231
 -include $(wildcard tests/*.d)
232 232
 
233
+include $(SRC_PATH)/tests/checkasm/Makefile
234
+
233 235
 .PHONY: fate* lcov lcov-reset
234 236
 .INTERMEDIATE: coverage.info
235 237
new file mode 100644
... ...
@@ -0,0 +1,33 @@
0
+# libavcodec tests
1
+AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
2
+
3
+CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
4
+
5
+
6
+-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile
7
+
8
+CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
9
+CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
10
+
11
+-include $(CHECKASMOBJS:.o=.d)
12
+
13
+CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
14
+$(CHECKASMOBJS): | $(CHECKASMDIRS)
15
+OBJDIRS += $(CHECKASMDIRS)
16
+
17
+# We rely on function pointers intentionally declared without specified argument types.
18
+tests/checkasm/%.o: CFLAGS := $(CFLAGS:-Wstrict-prototypes=-Wno-strict-prototypes)
19
+
20
+CHECKASM := tests/checkasm/checkasm$(EXESUF)
21
+
22
+$(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_DEP_LIBS)
23
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS)
24
+
25
+checkasm: $(CHECKASM)
26
+
27
+clean:: checkasmclean
28
+
29
+checkasmclean:
30
+	$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
31
+
32
+.PHONY: checkasm
0 33
new file mode 100644
... ...
@@ -0,0 +1,484 @@
0
+/*
1
+ * Assembly testing and benchmarking tool
2
+ * Copyright (c) 2015 Henrik Gramner
3
+ * Copyright (c) 2008 Loren Merritt
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License along
18
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
+ */
21
+
22
+#include <stdarg.h>
23
+#include <stdio.h>
24
+#include <stdlib.h>
25
+#include <string.h>
26
+#include "checkasm.h"
27
+#include "libavutil/common.h"
28
+#include "libavutil/cpu.h"
29
+#include "libavutil/random_seed.h"
30
+
31
+#if ARCH_X86
32
+#include "libavutil/x86/cpu.h"
33
+#endif
34
+
35
+#if HAVE_SETCONSOLETEXTATTRIBUTE
36
+#include <windows.h>
37
+#define COLOR_RED    FOREGROUND_RED
38
+#define COLOR_GREEN  FOREGROUND_GREEN
39
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
40
+#else
41
+#define COLOR_RED    1
42
+#define COLOR_GREEN  2
43
+#define COLOR_YELLOW 3
44
+#endif
45
+
46
+#if HAVE_UNISTD_H
47
+#include <unistd.h>
48
+#endif
49
+
50
+#if !HAVE_ISATTY
51
+#define isatty(fd) 1
52
+#endif
53
+
54
+/* List of tests to invoke */
55
+static void (* const tests[])(void) = {
56
+#if CONFIG_H264PRED
57
+    checkasm_check_h264pred,
58
+#endif
59
+    NULL
60
+};
61
+
62
+/* List of cpu flags to check */
63
+static const struct {
64
+    const char *name;
65
+    const char *suffix;
66
+    int flag;
67
+} cpus[] = {
68
+#if ARCH_X86
69
+    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
70
+    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
71
+    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
72
+    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
73
+    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
74
+    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
75
+    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
76
+    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
77
+    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
78
+    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
79
+    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
80
+    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
81
+    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
82
+    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
83
+    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
84
+#endif
85
+    { NULL }
86
+};
87
+
88
+typedef struct CheckasmFuncVersion {
89
+    struct CheckasmFuncVersion *next;
90
+    intptr_t (*func)();
91
+    int ok;
92
+    int cpu;
93
+    int iterations;
94
+    uint64_t cycles;
95
+} CheckasmFuncVersion;
96
+
97
+/* Binary search tree node */
98
+typedef struct CheckasmFunc {
99
+    struct CheckasmFunc *child[2];
100
+    CheckasmFuncVersion versions;
101
+    char name[1];
102
+} CheckasmFunc;
103
+
104
+/* Internal state */
105
+static struct {
106
+    CheckasmFunc *funcs;
107
+    CheckasmFunc *current_func;
108
+    CheckasmFuncVersion *current_func_ver;
109
+    const char *bench_pattern;
110
+    int bench_pattern_len;
111
+    int num_checked;
112
+    int num_failed;
113
+    int nop_time;
114
+    int cpu_flag;
115
+    const char *cpu_flag_name;
116
+} state;
117
+
118
+/* PRNG state */
119
+AVLFG checkasm_lfg;
120
+
121
+/* Print colored text to stderr if the terminal supports it */
122
+static void color_printf(int color, const char *fmt, ...)
123
+{
124
+    static int use_color = -1;
125
+    va_list arg;
126
+
127
+#if HAVE_SETCONSOLETEXTATTRIBUTE
128
+    static HANDLE con;
129
+    static WORD org_attributes;
130
+
131
+    if (use_color < 0) {
132
+        CONSOLE_SCREEN_BUFFER_INFO con_info;
133
+        con = GetStdHandle(STD_ERROR_HANDLE);
134
+        if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
135
+            org_attributes = con_info.wAttributes;
136
+            use_color = 1;
137
+        } else
138
+            use_color = 0;
139
+    }
140
+    if (use_color)
141
+        SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
142
+#else
143
+    if (use_color < 0) {
144
+        const char *term = getenv("TERM");
145
+        use_color = term && strcmp(term, "dumb") && isatty(2);
146
+    }
147
+    if (use_color)
148
+        fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
149
+#endif
150
+
151
+    va_start(arg, fmt);
152
+    vfprintf(stderr, fmt, arg);
153
+    va_end(arg);
154
+
155
+    if (use_color) {
156
+#if HAVE_SETCONSOLETEXTATTRIBUTE
157
+        SetConsoleTextAttribute(con, org_attributes);
158
+#else
159
+        fprintf(stderr, "\x1b[0m");
160
+#endif
161
+    }
162
+}
163
+
164
+/* Deallocate a tree */
165
+static void destroy_func_tree(CheckasmFunc *f)
166
+{
167
+    if (f) {
168
+        CheckasmFuncVersion *v = f->versions.next;
169
+        while (v) {
170
+            CheckasmFuncVersion *next = v->next;
171
+            free(v);
172
+            v = next;
173
+        }
174
+
175
+        destroy_func_tree(f->child[0]);
176
+        destroy_func_tree(f->child[1]);
177
+        free(f);
178
+    }
179
+}
180
+
181
+/* Allocate a zero-initialized block, clean up and exit on failure */
182
+static void *checkasm_malloc(size_t size)
183
+{
184
+    void *ptr = calloc(1, size);
185
+    if (!ptr) {
186
+        fprintf(stderr, "checkasm: malloc failed\n");
187
+        destroy_func_tree(state.funcs);
188
+        exit(1);
189
+    }
190
+    return ptr;
191
+}
192
+
193
+/* Get the suffix of the specified cpu flag */
194
+static const char *cpu_suffix(int cpu)
195
+{
196
+    int i = FF_ARRAY_ELEMS(cpus);
197
+
198
+    while (--i >= 0)
199
+        if (cpu & cpus[i].flag)
200
+            return cpus[i].suffix;
201
+
202
+    return "c";
203
+}
204
+
205
+#ifdef AV_READ_TIME
206
+static int cmp_nop(const void *a, const void *b)
207
+{
208
+    return *(const uint16_t*)a - *(const uint16_t*)b;
209
+}
210
+
211
+/* Measure the overhead of the timing code (in decicycles) */
212
+static int measure_nop_time(void)
213
+{
214
+    uint16_t nops[10000];
215
+    int i, nop_sum = 0;
216
+
217
+    for (i = 0; i < 10000; i++) {
218
+        uint64_t t = AV_READ_TIME();
219
+        nops[i] = AV_READ_TIME() - t;
220
+    }
221
+
222
+    qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
223
+    for (i = 2500; i < 7500; i++)
224
+        nop_sum += nops[i];
225
+
226
+    return nop_sum / 500;
227
+}
228
+
229
+/* Print benchmark results */
230
+static void print_benchs(CheckasmFunc *f)
231
+{
232
+    if (f) {
233
+        print_benchs(f->child[0]);
234
+
235
+        /* Only print functions with at least one assembly version */
236
+        if (f->versions.cpu || f->versions.next) {
237
+            CheckasmFuncVersion *v = &f->versions;
238
+            do {
239
+                if (v->iterations) {
240
+                    int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4;
241
+                    printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
242
+                }
243
+            } while ((v = v->next));
244
+        }
245
+
246
+        print_benchs(f->child[1]);
247
+    }
248
+}
249
+#endif
250
+
251
+/* ASCIIbetical sort except preserving natural order for numbers */
252
+static int cmp_func_names(const char *a, const char *b)
253
+{
254
+    int ascii_diff, digit_diff;
255
+
256
+    for (; !(ascii_diff = *a - *b) && *a; a++, b++);
257
+    for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
258
+
259
+    return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff;
260
+}
261
+
262
+/* Get a node with the specified name, creating it if it doesn't exist */
263
+static CheckasmFunc *get_func(const char *name, int length)
264
+{
265
+    CheckasmFunc *f, **f_ptr = &state.funcs;
266
+
267
+    /* Search the tree for a matching node */
268
+    while ((f = *f_ptr)) {
269
+        int cmp = cmp_func_names(name, f->name);
270
+        if (!cmp)
271
+            return f;
272
+
273
+        f_ptr = &f->child[(cmp > 0)];
274
+    }
275
+
276
+    /* Allocate and insert a new node into the tree */
277
+    f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length);
278
+    memcpy(f->name, name, length+1);
279
+
280
+    return f;
281
+}
282
+
283
+/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
284
+static void check_cpu_flag(const char *name, int flag)
285
+{
286
+    int old_cpu_flag = state.cpu_flag;
287
+
288
+    flag |= old_cpu_flag;
289
+    av_set_cpu_flags_mask(flag);
290
+    state.cpu_flag = av_get_cpu_flags();
291
+
292
+    if (!flag || state.cpu_flag != old_cpu_flag) {
293
+        int i;
294
+
295
+        state.cpu_flag_name = name;
296
+        for (i = 0; tests[i]; i++)
297
+            tests[i]();
298
+    }
299
+}
300
+
301
+/* Print the name of the current CPU flag, but only do it once */
302
+static void print_cpu_name(void)
303
+{
304
+    if (state.cpu_flag_name) {
305
+        color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
306
+        state.cpu_flag_name = NULL;
307
+    }
308
+}
309
+
310
+int main(int argc, char *argv[])
311
+{
312
+    int i, seed, ret = 0;
313
+
314
+    if (!tests[0] || !cpus[0].flag) {
315
+        fprintf(stderr, "checkasm: no tests to perform\n");
316
+        return 1;
317
+    }
318
+
319
+    if (argc > 1 && !strncmp(argv[1], "--bench", 7)) {
320
+#ifndef AV_READ_TIME
321
+        fprintf(stderr, "checkasm: --bench is not supported on your system\n");
322
+        return 1;
323
+#endif
324
+        if (argv[1][7] == '=') {
325
+            state.bench_pattern = argv[1] + 8;
326
+            state.bench_pattern_len = strlen(state.bench_pattern);
327
+        } else
328
+            state.bench_pattern = "";
329
+
330
+        argc--;
331
+        argv++;
332
+    }
333
+
334
+    seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed();
335
+    fprintf(stderr, "checkasm: using random seed %u\n", seed);
336
+    av_lfg_init(&checkasm_lfg, seed);
337
+
338
+    check_cpu_flag(NULL, 0);
339
+    for (i = 0; cpus[i].flag; i++)
340
+        check_cpu_flag(cpus[i].name, cpus[i].flag);
341
+
342
+    if (state.num_failed) {
343
+        fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
344
+        ret = 1;
345
+    } else {
346
+        fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
347
+#ifdef AV_READ_TIME
348
+        if (state.bench_pattern) {
349
+            state.nop_time = measure_nop_time();
350
+            printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
351
+            print_benchs(state.funcs);
352
+        }
353
+#endif
354
+    }
355
+
356
+    destroy_func_tree(state.funcs);
357
+    return ret;
358
+}
359
+
360
+/* Decide whether or not the specified function needs to be tested and
361
+ * allocate/initialize data structures if needed. Returns a pointer to a
362
+ * reference function if the function should be tested, otherwise NULL */
363
+intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))()
364
+{
365
+    char name_buf[256];
366
+    intptr_t (*ref)() = func;
367
+    CheckasmFuncVersion *v;
368
+    int name_length;
369
+    va_list arg;
370
+
371
+    va_start(arg, name);
372
+    name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
373
+    va_end(arg);
374
+
375
+    if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
376
+        return NULL;
377
+
378
+    state.current_func = get_func(name_buf, name_length);
379
+    v = &state.current_func->versions;
380
+
381
+    if (v->func) {
382
+        CheckasmFuncVersion *prev;
383
+        do {
384
+            /* Only test functions that haven't already been tested */
385
+            if (v->func == func)
386
+                return NULL;
387
+
388
+            if (v->ok)
389
+                ref = v->func;
390
+
391
+            prev = v;
392
+        } while ((v = v->next));
393
+
394
+        v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
395
+    }
396
+
397
+    v->func = func;
398
+    v->ok = 1;
399
+    v->cpu = state.cpu_flag;
400
+    state.current_func_ver = v;
401
+
402
+    if (state.cpu_flag)
403
+        state.num_checked++;
404
+
405
+    return ref;
406
+}
407
+
408
+/* Decide whether or not the current function needs to be benchmarked */
409
+int checkasm_bench_func(void)
410
+{
411
+    return !state.num_failed && state.bench_pattern &&
412
+           !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
413
+}
414
+
415
+/* Indicate that the current test has failed */
416
+void checkasm_fail_func(const char *msg, ...)
417
+{
418
+    if (state.current_func_ver->cpu && state.current_func_ver->ok) {
419
+        va_list arg;
420
+
421
+        print_cpu_name();
422
+        fprintf(stderr, "   %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
423
+        va_start(arg, msg);
424
+        vfprintf(stderr, msg, arg);
425
+        va_end(arg);
426
+        fprintf(stderr, ")\n");
427
+
428
+        state.current_func_ver->ok = 0;
429
+        state.num_failed++;
430
+    }
431
+}
432
+
433
+/* Update benchmark results of the current function */
434
+void checkasm_update_bench(int iterations, uint64_t cycles)
435
+{
436
+    state.current_func_ver->iterations += iterations;
437
+    state.current_func_ver->cycles += cycles;
438
+}
439
+
440
+/* Print the outcome of all tests performed since the last time this function was called */
441
+void checkasm_report(const char *name, ...)
442
+{
443
+    static int prev_checked, prev_failed, max_length;
444
+
445
+    if (state.num_checked > prev_checked) {
446
+        print_cpu_name();
447
+
448
+        if (*name) {
449
+            int pad_length = max_length;
450
+            va_list arg;
451
+
452
+            fprintf(stderr, " - ");
453
+            va_start(arg, name);
454
+            pad_length -= vfprintf(stderr, name, arg);
455
+            va_end(arg);
456
+            fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
457
+        } else
458
+            fprintf(stderr, " - %-*s [", max_length, state.current_func->name);
459
+
460
+        if (state.num_failed == prev_failed)
461
+            color_printf(COLOR_GREEN, "OK");
462
+        else
463
+            color_printf(COLOR_RED, "FAILED");
464
+        fprintf(stderr, "]\n");
465
+
466
+        prev_checked = state.num_checked;
467
+        prev_failed  = state.num_failed;
468
+    } else if (!state.cpu_flag) {
469
+        int length;
470
+
471
+        /* Calculate the amount of padding required to make the output vertically aligned */
472
+        if (*name) {
473
+            va_list arg;
474
+            va_start(arg, name);
475
+            length = vsnprintf(NULL, 0, name, arg);
476
+            va_end(arg);
477
+        } else
478
+            length = strlen(state.current_func->name);
479
+
480
+        if (length > max_length)
481
+            max_length = length;
482
+    }
483
+}
0 484
new file mode 100644
... ...
@@ -0,0 +1,115 @@
0
+/*
1
+ * Assembly testing and benchmarking tool
2
+ * Copyright (c) 2015 Henrik Gramner
3
+ * Copyright (c) 2008 Loren Merritt
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License along
18
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
+ */
21
+
22
+#ifndef CHECKASM_H
23
+#define CHECKASM_H
24
+
25
+#include <stdint.h>
26
+#include "config.h"
27
+#include "libavutil/avstring.h"
28
+#include "libavutil/lfg.h"
29
+#include "libavutil/timer.h"
30
+
31
+void checkasm_check_h264pred(void);
32
+
33
+intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3);
34
+int checkasm_bench_func(void);
35
+void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
36
+void checkasm_update_bench(int iterations, uint64_t cycles);
37
+void checkasm_report(const char *name, ...) av_printf_format(1, 2);
38
+
39
+extern AVLFG checkasm_lfg;
40
+#define rnd() av_lfg_get(&checkasm_lfg)
41
+
42
+static av_unused intptr_t (*func_ref)();
43
+static av_unused intptr_t (*func_new)();
44
+
45
+#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
46
+
47
+/* Decide whether or not the specified function needs to be tested */
48
+#define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\
49
+                              (func_ref = checkasm_check_func(func_new, __VA_ARGS__)))
50
+
51
+/* Indicate that the current test has failed */
52
+#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
53
+
54
+/* Print the test outcome */
55
+#define report(...) checkasm_report("" __VA_ARGS__)
56
+
57
+/* Call the reference function */
58
+#define call_ref(...) func_ref(__VA_ARGS__)
59
+
60
+#if ARCH_X86 && HAVE_YASM
61
+/* Verifies that clobbered callee-saved registers are properly saved and restored */
62
+intptr_t checkasm_checked_call(intptr_t (*func)(), ...);
63
+#endif
64
+
65
+/* Call the function */
66
+#if ARCH_X86_64 && HAVE_YASM
67
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
68
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
69
+ * assembly function through x264_checkasm_call with added dummy arguments which forces all
70
+ * real arguments to be passed on the stack and not in registers. For 32-bit arguments the
71
+ * upper half of the 64-bit register locations on the stack will now contain junk which will
72
+ * cause misbehaving functions to either produce incorrect output or segfault. Note that
73
+ * even though this works extremely well in practice, it's technically not guaranteed
74
+ * and false negatives is theoretically possible, but there can never be any false positives.
75
+ */
76
+void checkasm_stack_clobber(uint64_t clobber, ...);
77
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
78
+#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
79
+                                              CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
80
+                      checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
81
+#elif ARCH_X86_32 && HAVE_YASM
82
+#define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__)
83
+#else
84
+#define call_new(...) func_new(__VA_ARGS__)
85
+#endif
86
+
87
+/* Benchmark the function */
88
+#ifdef AV_READ_TIME
89
+#define bench_new(...)\
90
+    do {\
91
+        if (checkasm_bench_func()) {\
92
+            intptr_t (*tfunc)() = func_new;\
93
+            uint64_t tsum = 0;\
94
+            int ti, tcount = 0;\
95
+            for (ti = 0; ti < BENCH_RUNS; ti++) {\
96
+                uint64_t t = AV_READ_TIME();\
97
+                tfunc(__VA_ARGS__);\
98
+                tfunc(__VA_ARGS__);\
99
+                tfunc(__VA_ARGS__);\
100
+                tfunc(__VA_ARGS__);\
101
+                t = AV_READ_TIME() - t;\
102
+                if (t*tcount <= tsum*4 && ti > 0) {\
103
+                    tsum += t;\
104
+                    tcount++;\
105
+                }\
106
+            }\
107
+            checkasm_update_bench(tcount, tsum);\
108
+        }\
109
+    } while (0)
110
+#else
111
+#define bench_new(...)
112
+#endif
113
+
114
+#endif
0 115
new file mode 100644
... ...
@@ -0,0 +1,252 @@
0
+/*
1
+ * Copyright (c) 2015 Henrik Gramner
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or modify
6
+ * it under the terms of the GNU General Public License as published by
7
+ * the Free Software Foundation; either version 2 of the License, or
8
+ * (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
+ * GNU General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU General Public License along
16
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18
+ */
19
+
20
+#include <string.h>
21
+#include "checkasm.h"
22
+#include "libavcodec/avcodec.h"
23
+#include "libavcodec/h264pred.h"
24
+#include "libavutil/common.h"
25
+#include "libavutil/intreadwrite.h"
26
+
27
+static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
28
+
29
+static const char * const pred4x4_modes[4][15] = {
30
+    { /* H264 */
31
+        [VERT_PRED           ] = "vertical",
32
+        [HOR_PRED            ] = "horizontal",
33
+        [DC_PRED             ] = "dc",
34
+        [DIAG_DOWN_LEFT_PRED ] = "down_left",
35
+        [DIAG_DOWN_RIGHT_PRED] = "down_right",
36
+        [VERT_RIGHT_PRED     ] = "vertical_right",
37
+        [HOR_DOWN_PRED       ] = "horizontal_right",
38
+        [VERT_LEFT_PRED      ] = "vertical_left",
39
+        [HOR_UP_PRED         ] = "horizontal_up",
40
+        [LEFT_DC_PRED        ] = "left_dc",
41
+        [TOP_DC_PRED         ] = "top_dc",
42
+        [DC_128_PRED         ] = "dc_128",
43
+    },
44
+    { /* VP8 */
45
+        [VERT_PRED     ] = "vertical_vp8",
46
+        [HOR_PRED      ] = "horizontal_vp8",
47
+        [VERT_LEFT_PRED] = "vertical_left_vp8",
48
+        [TM_VP8_PRED   ] = "tm_vp8",
49
+        [DC_127_PRED   ] = "dc_127_vp8",
50
+        [DC_129_PRED   ] = "dc_129_vp8",
51
+    },
52
+    { /* RV40 */
53
+        [DIAG_DOWN_LEFT_PRED            ] = "down_left_rv40",
54
+        [VERT_LEFT_PRED                 ] = "vertical_left_rv40",
55
+        [HOR_UP_PRED                    ] = "horizontal_up_rv40",
56
+        [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
57
+        [HOR_UP_PRED_RV40_NODOWN        ] = "horizontal_up_nodown_rv40",
58
+        [VERT_LEFT_PRED_RV40_NODOWN     ] = "vertical_left_nodown_rv40",
59
+    },
60
+    { /* SVQ3 */
61
+        [DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
62
+    },
63
+};
64
+
65
+static const char * const pred8x8_modes[4][11] = {
66
+    { /* H264 */
67
+        [DC_PRED8x8              ] = "dc",
68
+        [HOR_PRED8x8             ] = "horizontal",
69
+        [VERT_PRED8x8            ] = "vertical",
70
+        [PLANE_PRED8x8           ] = "plane",
71
+        [LEFT_DC_PRED8x8         ] = "left_dc",
72
+        [TOP_DC_PRED8x8          ] = "top_dc",
73
+        [DC_128_PRED8x8          ] = "dc_128",
74
+        [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
75
+        [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
76
+        [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
77
+        [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
78
+    },
79
+    { /* VP8 */
80
+        [PLANE_PRED8x8 ] = "tm_vp8",
81
+        [DC_127_PRED8x8] = "dc_127_vp8",
82
+        [DC_129_PRED8x8] = "dc_129_vp8",
83
+    },
84
+    { /* RV40 */
85
+        [DC_PRED8x8     ] = "dc_rv40",
86
+        [LEFT_DC_PRED8x8] = "left_dc_rv40",
87
+        [TOP_DC_PRED8x8 ] = "top_dc_rv40",
88
+    },
89
+    { /* SVQ3 */
90
+    },
91
+};
92
+
93
+static const char * const pred16x16_modes[4][9] = {
94
+    { /* H264 */
95
+        [DC_PRED8x8     ] = "dc",
96
+        [HOR_PRED8x8    ] = "horizontal",
97
+        [VERT_PRED8x8   ] = "vertical",
98
+        [PLANE_PRED8x8  ] = "plane",
99
+        [LEFT_DC_PRED8x8] = "left_dc",
100
+        [TOP_DC_PRED8x8 ] = "top_dc",
101
+        [DC_128_PRED8x8 ] = "dc_128",
102
+    },
103
+    { /* VP8 */
104
+        [PLANE_PRED8x8 ] = "tm_vp8",
105
+        [DC_127_PRED8x8] = "dc_127_vp8",
106
+        [DC_129_PRED8x8] = "dc_129_vp8",
107
+    },
108
+    { /* RV40 */
109
+        [PLANE_PRED8x8] = "plane_rv40",
110
+    },
111
+    { /* SVQ3 */
112
+        [PLANE_PRED8x8] = "plane_svq3",
113
+    },
114
+};
115
+
116
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
117
+
118
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
119
+#define BUF_SIZE (3*16*17)
120
+
121
+#define check_pred_func(func, name, mode_name)\
122
+    (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\
123
+    check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\
124
+    check_func(func, "pred%s_%s", name, mode_name)))
125
+
126
+#define randomize_buffers()\
127
+    do {\
128
+        uint32_t mask = pixel_mask[bit_depth-8];\
129
+        int i;\
130
+        for (i = 0; i < BUF_SIZE; i += 4) {\
131
+            uint32_t r = rnd() & mask;\
132
+            AV_WN32A(buf0+i, r);\
133
+            AV_WN32A(buf1+i, r);\
134
+        }\
135
+    } while (0)
136
+
137
+#define src0 (buf0 + 4*16) /* Offset to allow room for top and left */
138
+#define src1 (buf1 + 4*16)
139
+
140
+static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
141
+                          int codec, int chroma_format, int bit_depth)
142
+{
143
+    if (chroma_format == 1) {
144
+        uint8_t *topright = buf0 + 2*16;
145
+        int pred_mode;
146
+        for (pred_mode = 0; pred_mode < 15; pred_mode++) {
147
+            if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
148
+                randomize_buffers();
149
+                call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
150
+                call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
151
+                if (memcmp(buf0, buf1, BUF_SIZE))
152
+                    fail();
153
+                bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL);
154
+            }
155
+        }
156
+    }
157
+}
158
+
159
+static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
160
+                          int codec, int chroma_format, int bit_depth)
161
+{
162
+    int pred_mode;
163
+    for (pred_mode = 0; pred_mode < 11; pred_mode++) {
164
+        if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
165
+                            pred8x8_modes[codec][pred_mode])) {
166
+            randomize_buffers();
167
+            call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL);
168
+            call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL);
169
+            if (memcmp(buf0, buf1, BUF_SIZE))
170
+                fail();
171
+            bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL);
172
+        }
173
+    }
174
+}
175
+
176
+static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
177
+                            int codec, int chroma_format, int bit_depth)
178
+{
179
+    if (chroma_format == 1) {
180
+        int pred_mode;
181
+        for (pred_mode = 0; pred_mode < 9; pred_mode++) {
182
+            if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
183
+                randomize_buffers();
184
+                call_ref(src0, (ptrdiff_t)48);
185
+                call_new(src1, (ptrdiff_t)48);
186
+                if (memcmp(buf0, buf1, BUF_SIZE))
187
+                    fail();
188
+                bench_new(src1, (ptrdiff_t)48);
189
+            }
190
+        }
191
+    }
192
+}
193
+
194
+static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
195
+                           int codec, int chroma_format, int bit_depth)
196
+{
197
+    if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
198
+        int pred_mode;
199
+        for (pred_mode = 0; pred_mode < 12; pred_mode++) {
200
+            if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
201
+                int neighbors;
202
+                for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
203
+                    int has_topleft  = neighbors & 0x8000;
204
+                    int has_topright = neighbors & 0x4000;
205
+
206
+                    if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
207
+                        continue; /* Those aren't allowed according to the spec */
208
+
209
+                    randomize_buffers();
210
+                    call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
211
+                    call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
212
+                    if (memcmp(buf0, buf1, BUF_SIZE))
213
+                        fail();
214
+                    bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL);
215
+                }
216
+            }
217
+        }
218
+    }
219
+}
220
+
221
+/* TODO: Add tests for H.264 lossless H/V prediction */
222
+
223
+void checkasm_check_h264pred(void)
224
+{
225
+    static const struct {
226
+        void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
227
+        const char *name;
228
+    } tests[] = {
229
+        { check_pred4x4,   "pred4x4"   },
230
+        { check_pred8x8,   "pred8x8"   },
231
+        { check_pred16x16, "pred16x16" },
232
+        { check_pred8x8l,  "pred8x8l"  },
233
+    };
234
+
235
+    DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE];
236
+    DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE];
237
+    H264PredContext h;
238
+    int test, codec, chroma_format, bit_depth;
239
+
240
+    for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
241
+        for (codec = 0; codec < 4; codec++) {
242
+            int codec_id = codec_ids[codec];
243
+            for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
244
+                for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
245
+                    ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
246
+                    tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
247
+                }
248
+        }
249
+        report("%s", tests[test].name);
250
+    }
251
+}
0 252
new file mode 100644
... ...
@@ -0,0 +1,6 @@
0
+CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o
1
+
2
+tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm
3
+	$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
4
+	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
5
+	-$(STRIP) $(STRIPFLAGS) $@
0 6
new file mode 100644
... ...
@@ -0,0 +1,193 @@
0
+;*****************************************************************************
1
+;* Assembly testing and benchmarking tool
2
+;* Copyright (c) 2008 Loren Merritt
3
+;* Copyright (c) 2012 Henrik Gramner
4
+;*
5
+;* This file is part of FFmpeg.
6
+;*
7
+;* FFmpeg is free software; you can redistribute it and/or modify
8
+;* it under the terms of the GNU General Public License as published by
9
+;* the Free Software Foundation; either version 2 of the License, or
10
+;* (at your option) any later version.
11
+;*
12
+;* FFmpeg is distributed in the hope that it will be useful,
13
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+;* GNU General Public License for more details.
16
+;*
17
+;* You should have received a copy of the GNU General Public License
18
+;* along with this program; if not, write to the Free Software
19
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
+;*****************************************************************************
21
+
22
+%define private_prefix checkasm
23
+%include "libavutil/x86/x86inc.asm"
24
+
25
+SECTION_RODATA
26
+
27
+error_message: db "failed to preserve register", 0
28
+
29
+%if ARCH_X86_64
30
+; just random numbers to reduce the chance of incidental match
31
+ALIGN 16
32
+x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
33
+x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
34
+x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
35
+x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
36
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
37
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
38
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
39
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
40
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
41
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
42
+n7:  dq 0x21f86d66c8ca00ce
43
+n8:  dq 0x75b6ba21077c48ad
44
+n9:  dq 0xed56bb2dcb3c7736
45
+n10: dq 0x8bda43d3fd1a7e06
46
+n11: dq 0xb64a9c9e5d318408
47
+n12: dq 0xdf9a54b303f1d3a3
48
+n13: dq 0x4a75479abd64e097
49
+n14: dq 0x249214109d5d1c88
50
+%endif
51
+
52
+SECTION .text
53
+
54
+cextern fail_func
55
+
56
+; max number of args used by any asm function.
57
+; (max_args % 4) must equal 3 for stack alignment
58
+%define max_args 15
59
+
60
+%if ARCH_X86_64
61
+
62
+;-----------------------------------------------------------------------------
63
+; int checkasm_stack_clobber(uint64_t clobber, ...)
64
+;-----------------------------------------------------------------------------
65
+cglobal stack_clobber, 1,2
66
+    ; Clobber the stack with junk below the stack pointer
67
+    %define size (max_args+6)*8
68
+    SUB  rsp, size
69
+    mov   r1, size-8
70
+.loop:
71
+    mov [rsp+r1], r0
72
+    sub   r1, 8
73
+    jge .loop
74
+    ADD  rsp, size
75
+    RET
76
+
77
+%if WIN64
78
+    %assign free_regs 7
79
+%else
80
+    %assign free_regs 9
81
+%endif
82
+
83
+;-----------------------------------------------------------------------------
84
+; intptr_t checkasm_checked_call(intptr_t (*func)(), ...)
85
+;-----------------------------------------------------------------------------
86
+INIT_XMM
87
+cglobal checked_call, 2,15,16,max_args*8+8
88
+    mov  r6, r0
89
+
90
+    ; All arguments have been pushed on the stack instead of registers in order to
91
+    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
92
+    mov  r0, r6mp
93
+    mov  r1, r7mp
94
+    mov  r2, r8mp
95
+    mov  r3, r9mp
96
+%if UNIX64
97
+    mov  r4, r10mp
98
+    mov  r5, r11mp
99
+    %assign i 6
100
+    %rep max_args-6
101
+        mov  r9, [rsp+stack_offset+(i+1)*8]
102
+        mov  [rsp+(i-6)*8], r9
103
+        %assign i i+1
104
+    %endrep
105
+%else
106
+    %assign i 4
107
+    %rep max_args-4
108
+        mov  r9, [rsp+stack_offset+(i+7)*8]
109
+        mov  [rsp+i*8], r9
110
+        %assign i i+1
111
+    %endrep
112
+%endif
113
+
114
+%if WIN64
115
+    %assign i 6
116
+    %rep 16-6
117
+        mova m %+ i, [x %+ i]
118
+        %assign i i+1
119
+    %endrep
120
+%endif
121
+
122
+%assign i 14
123
+%rep 15-free_regs
124
+    mov r %+ i, [n %+ i]
125
+    %assign i i-1
126
+%endrep
127
+    call r6
128
+%assign i 14
129
+%rep 15-free_regs
130
+    xor r %+ i, [n %+ i]
131
+    or  r14, r %+ i
132
+    %assign i i-1
133
+%endrep
134
+
135
+%if WIN64
136
+    %assign i 6
137
+    %rep 16-6
138
+        pxor m %+ i, [x %+ i]
139
+        por  m6, m %+ i
140
+        %assign i i+1
141
+    %endrep
142
+    packsswb m6, m6
143
+    movq r5, m6
144
+    or  r14, r5
145
+%endif
146
+
147
+    jz .ok
148
+    mov  r9, rax
149
+    lea  r0, [error_message]
150
+    call fail_func
151
+    mov rax, r9
152
+.ok:
153
+    RET
154
+
155
+%else
156
+
157
+; just random numbers to reduce the chance of incidental match
158
+%define n3 dword 0x6549315c
159
+%define n4 dword 0xe02f3e23
160
+%define n5 dword 0xb78d0d1d
161
+%define n6 dword 0x33627ba7
162
+
163
+;-----------------------------------------------------------------------------
164
+; intptr_t checkasm_checked_call(intptr_t (*func)(), ...)
165
+;-----------------------------------------------------------------------------
166
+cglobal checked_call, 1,7
167
+    mov  r3, n3
168
+    mov  r4, n4
169
+    mov  r5, n5
170
+    mov  r6, n6
171
+%rep max_args
172
+    PUSH dword [esp+20+max_args*4]
173
+%endrep
174
+    call r0
175
+    xor  r3, n3
176
+    xor  r4, n4
177
+    xor  r5, n5
178
+    xor  r6, n6
179
+    or   r3, r4
180
+    or   r5, r6
181
+    or   r3, r5
182
+    jz .ok
183
+    mov  r3, eax
184
+    lea  r0, [error_message]
185
+    mov [esp], r0
186
+    call fail_func
187
+    mov  eax, r3
188
+.ok:
189
+    add  esp, max_args*4
190
+    REP_RET
191
+
192
+%endif ; ARCH_X86_64