Browse code

Add macros for 64- and 128-bit write-combining optimization to intreadwrite.h.

Add x86 implementation using MMX/SSE.

Originally committed as revision 21281 to svn://svn.ffmpeg.org/ffmpeg/trunk

Alexander Strange authored on 2010/01/18 19:24:33
Showing 2 changed files
... ...
@@ -25,8 +25,9 @@
25 25
 
26 26
 /*
27 27
  * Arch-specific headers can provide any combination of
28
- * AV_[RW][BLN](16|24|32|64) macros.  Preprocessor symbols must be
29
- * defined, even if these are implemented as inline functions.
28
+ * AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
29
+ * Preprocessor symbols must be defined, even if these are implemented
30
+ * as inline functions.
30 31
  */
31 32
 
32 33
 #if   ARCH_ARM
... ...
@@ -37,6 +38,8 @@
37 37
 #   include "mips/intreadwrite.h"
38 38
 #elif ARCH_PPC
39 39
 #   include "ppc/intreadwrite.h"
40
+#elif ARCH_X86
41
+#   include "x86/intreadwrite.h"
40 42
 #endif
41 43
 
42 44
 /*
... ...
@@ -397,4 +400,44 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
397 397
     } while(0)
398 398
 #endif
399 399
 
400
+/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
401
+ * naturally aligned. They may be implemented using MMX,
402
+ * so emms_c() must be called before using any float code
403
+ * afterwards.
404
+ */
405
+
406
+#define AV_COPY(n, d, s) (*(uint##n##_t*)(d) = *(const uint##n##_t*)(s))
407
+
408
+#ifndef AV_COPY64
409
+#   define AV_COPY64(d, s) AV_COPY(64, d, s)
410
+#endif
411
+
412
+#ifndef AV_COPY128
413
+#   define AV_COPY128(d, s)                    \
414
+    do {                                       \
415
+        AV_COPY64(d, s);                       \
416
+        AV_COPY64((char*)(d)+8, (char*)(s)+8); \
417
+    } while(0)
418
+#endif
419
+
420
+#define AV_SWAP(n, a, b) FFSWAP(uint##n##_t, *(uint##n##_t*)(a), *(uint##n##_t*)(b))
421
+
422
+#ifndef AV_SWAP64
423
+#   define AV_SWAP64(a, b) AV_SWAP(64, a, b)
424
+#endif
425
+
426
+#define AV_ZERO(n, d) (*(uint##n##_t*)(d) = 0)
427
+
428
+#ifndef AV_ZERO64
429
+#   define AV_ZERO64(d) AV_ZERO(64, d)
430
+#endif
431
+
432
+#ifndef AV_ZERO128
433
+#   define AV_ZERO128(d)         \
434
+    do {                         \
435
+        AV_ZERO64(d);            \
436
+        AV_ZERO64((char*)(d)+8); \
437
+    } while(0)
438
+#endif
439
+
400 440
 #endif /* AVUTIL_INTREADWRITE_H */
401 441
new file mode 100644
... ...
@@ -0,0 +1,96 @@
0
+/*
1
+ * Copyright (c) 2010 Alexander Strange <astrange@ithinksw.com>
2
+ *
3
+ * This file is part of FFmpeg.
4
+ *
5
+ * FFmpeg is free software; you can redistribute it and/or
6
+ * modify it under the terms of the GNU Lesser General Public
7
+ * License as published by the Free Software Foundation; either
8
+ * version 2.1 of the License, or (at your option) any later version.
9
+ *
10
+ * FFmpeg is distributed in the hope that it will be useful,
11
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
+ * Lesser General Public License for more details.
14
+ *
15
+ * You should have received a copy of the GNU Lesser General Public
16
+ * License along with FFmpeg; if not, write to the Free Software
17
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ */
19
+
20
+#ifndef AVUTIL_X86_INTREADWRITE_H
21
+#define AVUTIL_X86_INTREADWRITE_H
22
+
23
+#include <stdint.h>
24
+#include "config.h"
25
+
26
+#if HAVE_MMX
27
+
28
+#if !HAVE_FAST_64BIT && defined(__MMX__)
29
+
30
+#define AV_COPY64 AV_COPY64
31
+static av_always_inline void AV_COPY64(void *d, const void *s)
32
+{
33
+    __asm__("movq   %1, %%mm0  \n\t"
34
+            "movq   %%mm0, %0  \n\t"
35
+            : "=m"(*(uint64_t*)d)
36
+            : "m" (*(const uint64_t*)s)
37
+            : "mm0");
38
+}
39
+
40
+#define AV_SWAP64 AV_SWAP64
41
+static av_always_inline void AV_SWAP64(void *a, void *b)
42
+{
43
+    __asm__("movq   %1, %%mm0  \n\t"
44
+            "movq   %0, %%mm1  \n\t"
45
+            "movq   %%mm0, %0  \n\t"
46
+            "movq   %%mm1, %1  \n\t"
47
+            : "+m"(*(uint64_t*)a), "+m"(*(uint64_t*)b)
48
+            ::"mm0", "mm1");
49
+}
50
+
51
+#define AV_ZERO64 AV_ZERO64
52
+static av_always_inline void AV_ZERO64(void *d)
53
+{
54
+    __asm__("pxor %%mm0, %%mm0  \n\t"
55
+            "movq %%mm0, %0     \n\t"
56
+            : "=m"(*(uint64_t*)d)
57
+            :: "mm0");
58
+}
59
+
60
+#endif /* !HAVE_FAST_64BIT && defined(__MMX__) */
61
+
62
+#ifdef __SSE__
63
+
64
+#define AV_COPY128 AV_COPY128
65
+static av_always_inline void AV_COPY128(void *d, const void *s)
66
+{
67
+    struct v {uint64_t v[2];};
68
+
69
+    __asm__("movaps   %1, %%xmm0  \n\t"
70
+            "movaps   %%xmm0, %0  \n\t"
71
+            : "=m"(*(struct v*)d)
72
+            : "m" (*(const struct v*)s)
73
+            : "xmm0");
74
+}
75
+
76
+#endif /* __SSE__ */
77
+
78
+#ifdef __SSE2__
79
+
80
+#define AV_ZERO128 AV_ZERO128
81
+static av_always_inline void AV_ZERO128(void *d)
82
+{
83
+    struct v {uint64_t v[2];};
84
+
85
+    __asm__("pxor %%xmm0, %%xmm0  \n\t"
86
+            "movdqa   %%xmm0, %0  \n\t"
87
+            : "=m"(*(struct v*)d)
88
+            :: "xmm0");
89
+}
90
+
91
+#endif /* __SSE2__ */
92
+
93
+#endif /* HAVE_MMX */
94
+
95
+#endif /* AVUTIL_X86_INTREADWRITE_H */