Add x86 implementation using MMX/SSE.
Originally committed as revision 21281 to svn://svn.ffmpeg.org/ffmpeg/trunk
| ... | ... |
@@ -25,8 +25,9 @@ |
| 25 | 25 |
|
| 26 | 26 |
/* |
| 27 | 27 |
* Arch-specific headers can provide any combination of |
| 28 |
- * AV_[RW][BLN](16|24|32|64) macros. Preprocessor symbols must be |
|
| 29 |
- * defined, even if these are implemented as inline functions. |
|
| 28 |
+ * AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros. |
|
| 29 |
+ * Preprocessor symbols must be defined, even if these are implemented |
|
| 30 |
+ * as inline functions. |
|
| 30 | 31 |
*/ |
| 31 | 32 |
|
| 32 | 33 |
#if ARCH_ARM |
| ... | ... |
@@ -37,6 +38,8 @@ |
| 37 | 37 |
# include "mips/intreadwrite.h" |
| 38 | 38 |
#elif ARCH_PPC |
| 39 | 39 |
# include "ppc/intreadwrite.h" |
| 40 |
+#elif ARCH_X86 |
|
| 41 |
+# include "x86/intreadwrite.h" |
|
| 40 | 42 |
#endif |
| 41 | 43 |
|
| 42 | 44 |
/* |
| ... | ... |
@@ -397,4 +400,44 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
|
| 397 | 397 |
} while(0) |
| 398 | 398 |
#endif |
| 399 | 399 |
|
| 400 |
+/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be |
|
| 401 |
+ * naturally aligned. They may be implemented using MMX, |
|
| 402 |
+ * so emms_c() must be called before using any float code |
|
| 403 |
+ * afterwards. |
|
| 404 |
+ */ |
|
| 405 |
+ |
|
| 406 |
+#define AV_COPY(n, d, s) (*(uint##n##_t*)(d) = *(const uint##n##_t*)(s)) |
|
| 407 |
+ |
|
| 408 |
+#ifndef AV_COPY64 |
|
| 409 |
+# define AV_COPY64(d, s) AV_COPY(64, d, s) |
|
| 410 |
+#endif |
|
| 411 |
+ |
|
| 412 |
+#ifndef AV_COPY128 |
|
| 413 |
+# define AV_COPY128(d, s) \ |
|
| 414 |
+ do { \
|
|
| 415 |
+ AV_COPY64(d, s); \ |
|
| 416 |
+ AV_COPY64((char*)(d)+8, (char*)(s)+8); \ |
|
| 417 |
+ } while(0) |
|
| 418 |
+#endif |
|
| 419 |
+ |
|
| 420 |
+#define AV_SWAP(n, a, b) FFSWAP(uint##n##_t, *(uint##n##_t*)(a), *(uint##n##_t*)(b)) |
|
| 421 |
+ |
|
| 422 |
+#ifndef AV_SWAP64 |
|
| 423 |
+# define AV_SWAP64(a, b) AV_SWAP(64, a, b) |
|
| 424 |
+#endif |
|
| 425 |
+ |
|
| 426 |
+#define AV_ZERO(n, d) (*(uint##n##_t*)(d) = 0) |
|
| 427 |
+ |
|
| 428 |
+#ifndef AV_ZERO64 |
|
| 429 |
+# define AV_ZERO64(d) AV_ZERO(64, d) |
|
| 430 |
+#endif |
|
| 431 |
+ |
|
| 432 |
+#ifndef AV_ZERO128 |
|
| 433 |
+# define AV_ZERO128(d) \ |
|
| 434 |
+ do { \
|
|
| 435 |
+ AV_ZERO64(d); \ |
|
| 436 |
+ AV_ZERO64((char*)(d)+8); \ |
|
| 437 |
+ } while(0) |
|
| 438 |
+#endif |
|
| 439 |
+ |
|
| 400 | 440 |
#endif /* AVUTIL_INTREADWRITE_H */ |
| 401 | 441 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,96 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2010 Alexander Strange <astrange@ithinksw.com> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#ifndef AVUTIL_X86_INTREADWRITE_H |
|
| 21 |
+#define AVUTIL_X86_INTREADWRITE_H |
|
| 22 |
+ |
|
| 23 |
+#include <stdint.h> |
|
| 24 |
+#include "config.h" |
|
| 25 |
+ |
|
| 26 |
+#if HAVE_MMX |
|
| 27 |
+ |
|
| 28 |
+#if !HAVE_FAST_64BIT && defined(__MMX__) |
|
| 29 |
+ |
|
| 30 |
+#define AV_COPY64 AV_COPY64 |
|
| 31 |
+static av_always_inline void AV_COPY64(void *d, const void *s) |
|
| 32 |
+{
|
|
| 33 |
+ __asm__("movq %1, %%mm0 \n\t"
|
|
| 34 |
+ "movq %%mm0, %0 \n\t" |
|
| 35 |
+ : "=m"(*(uint64_t*)d) |
|
| 36 |
+ : "m" (*(const uint64_t*)s) |
|
| 37 |
+ : "mm0"); |
|
| 38 |
+} |
|
| 39 |
+ |
|
| 40 |
+#define AV_SWAP64 AV_SWAP64 |
|
| 41 |
+static av_always_inline void AV_SWAP64(void *a, void *b) |
|
| 42 |
+{
|
|
| 43 |
+ __asm__("movq %1, %%mm0 \n\t"
|
|
| 44 |
+ "movq %0, %%mm1 \n\t" |
|
| 45 |
+ "movq %%mm0, %0 \n\t" |
|
| 46 |
+ "movq %%mm1, %1 \n\t" |
|
| 47 |
+ : "+m"(*(uint64_t*)a), "+m"(*(uint64_t*)b) |
|
| 48 |
+ ::"mm0", "mm1"); |
|
| 49 |
+} |
|
| 50 |
+ |
|
| 51 |
+#define AV_ZERO64 AV_ZERO64 |
|
| 52 |
+static av_always_inline void AV_ZERO64(void *d) |
|
| 53 |
+{
|
|
| 54 |
+ __asm__("pxor %%mm0, %%mm0 \n\t"
|
|
| 55 |
+ "movq %%mm0, %0 \n\t" |
|
| 56 |
+ : "=m"(*(uint64_t*)d) |
|
| 57 |
+ :: "mm0"); |
|
| 58 |
+} |
|
| 59 |
+ |
|
| 60 |
+#endif /* !HAVE_FAST_64BIT && defined(__MMX__) */ |
|
| 61 |
+ |
|
| 62 |
+#ifdef __SSE__ |
|
| 63 |
+ |
|
| 64 |
+#define AV_COPY128 AV_COPY128 |
|
| 65 |
+static av_always_inline void AV_COPY128(void *d, const void *s) |
|
| 66 |
+{
|
|
| 67 |
+ struct v {uint64_t v[2];};
|
|
| 68 |
+ |
|
| 69 |
+ __asm__("movaps %1, %%xmm0 \n\t"
|
|
| 70 |
+ "movaps %%xmm0, %0 \n\t" |
|
| 71 |
+ : "=m"(*(struct v*)d) |
|
| 72 |
+ : "m" (*(const struct v*)s) |
|
| 73 |
+ : "xmm0"); |
|
| 74 |
+} |
|
| 75 |
+ |
|
| 76 |
+#endif /* __SSE__ */ |
|
| 77 |
+ |
|
| 78 |
+#ifdef __SSE2__ |
|
| 79 |
+ |
|
| 80 |
+#define AV_ZERO128 AV_ZERO128 |
|
| 81 |
+static av_always_inline void AV_ZERO128(void *d) |
|
| 82 |
+{
|
|
| 83 |
+ struct v {uint64_t v[2];};
|
|
| 84 |
+ |
|
| 85 |
+ __asm__("pxor %%xmm0, %%xmm0 \n\t"
|
|
| 86 |
+ "movdqa %%xmm0, %0 \n\t" |
|
| 87 |
+ : "=m"(*(struct v*)d) |
|
| 88 |
+ :: "xmm0"); |
|
| 89 |
+} |
|
| 90 |
+ |
|
| 91 |
+#endif /* __SSE2__ */ |
|
| 92 |
+ |
|
| 93 |
+#endif /* HAVE_MMX */ |
|
| 94 |
+ |
|
| 95 |
+#endif /* AVUTIL_X86_INTREADWRITE_H */ |