GitList

Browse code

Add macros for 64- and 128-bit write-combining optimization to intreadwrite.h.

Add x86 implementation using MMX/SSE.

Originally committed as revision 21281 to svn://svn.ffmpeg.org/ffmpeg/trunk

Alexander Strange authored on 2010/01/18 19:24:33
Showing 2 changed files

libavutil/intreadwrite.h index 78f8330..bdc3c43 100644
libavutil/x86/intreadwrite.h index 0000000..4621f06

@@ -25,8 +25,9 @@
                      /*
                       * Arch-specific headers can provide any combination of
                     - * AV_[RW][BLN](16|24|32|64) macros.  Preprocessor symbols must be
                     - * defined, even if these are implemented as inline functions.
                     + * AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
                     + * Preprocessor symbols must be defined, even if these are implemented
                     + * as inline functions.
                       */
                      #if   ARCH_ARM
@@ -37,6 +38,8 @@
                      #   include "mips/intreadwrite.h"
                      #elif ARCH_PPC
                      #   include "ppc/intreadwrite.h"
                     +#elif ARCH_X86
                     +#   include "x86/intreadwrite.h"
                      #endif
                      /*
@@ -397,4 +400,44 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
                          } while(0)
                      #endif
                     +/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
                     + * naturally aligned. They may be implemented using MMX,
                     + * so emms_c() must be called before using any float code
                     + * afterwards.
                     + */
+                    +
                     +#define AV_COPY(n, d, s) (*(uint##n##_t*)(d) = *(const uint##n##_t*)(s))
+                    +
                     +#ifndef AV_COPY64
                     +#   define AV_COPY64(d, s) AV_COPY(64, d, s)
                     +#endif
+                    +
                     +#ifndef AV_COPY128
                     +#   define AV_COPY128(d, s)                    \
                     +    do {                                       \
                     +        AV_COPY64(d, s);                       \
                     +        AV_COPY64((char*)(d)+8, (char*)(s)+8); \
                     +    } while(0)
                     +#endif
+                    +
                     +#define AV_SWAP(n, a, b) FFSWAP(uint##n##_t, *(uint##n##_t*)(a), *(uint##n##_t*)(b))
+                    +
                     +#ifndef AV_SWAP64
                     +#   define AV_SWAP64(a, b) AV_SWAP(64, a, b)
                     +#endif
+                    +
                     +#define AV_ZERO(n, d) (*(uint##n##_t*)(d) = 0)
+                    +
                     +#ifndef AV_ZERO64
                     +#   define AV_ZERO64(d) AV_ZERO(64, d)
                     +#endif
+                    +
                     +#ifndef AV_ZERO128
                     +#   define AV_ZERO128(d)         \
                     +    do {                         \
                     +        AV_ZERO64(d);            \
                     +        AV_ZERO64((char*)(d)+8); \
                     +    } while(0)
                     +#endif
+                    +
                      #endif /* AVUTIL_INTREADWRITE_H */

libavutil/x86/intreadwrite.h

History View file @ f6d0390

                     new file mode 100644
@@ -0,0 +1,96 @@
                     +/*
                     + * Copyright (c) 2010 Alexander Strange <astrange@ithinksw.com>
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#ifndef AVUTIL_X86_INTREADWRITE_H
                     +#define AVUTIL_X86_INTREADWRITE_H
+                    +
                     +#include <stdint.h>
                     +#include "config.h"
+                    +
                     +#if HAVE_MMX
+                    +
                     +#if !HAVE_FAST_64BIT && defined(__MMX__)
+                    +
                     +#define AV_COPY64 AV_COPY64
                     +static av_always_inline void AV_COPY64(void *d, const void *s)
                     +{
                     +    __asm__("movq   %1, %%mm0  \n\t"
                     +            "movq   %%mm0, %0  \n\t"
                     +            : "=m"(*(uint64_t*)d)
                     +            : "m" (*(const uint64_t*)s)
                     +            : "mm0");
                     +}
+                    +
                     +#define AV_SWAP64 AV_SWAP64
                     +static av_always_inline void AV_SWAP64(void *a, void *b)
                     +{
                     +    __asm__("movq   %1, %%mm0  \n\t"
                     +            "movq   %0, %%mm1  \n\t"
                     +            "movq   %%mm0, %0  \n\t"
                     +            "movq   %%mm1, %1  \n\t"
                     +            : "+m"(*(uint64_t*)a), "+m"(*(uint64_t*)b)
                     +            ::"mm0", "mm1");
                     +}
+                    +
                     +#define AV_ZERO64 AV_ZERO64
                     +static av_always_inline void AV_ZERO64(void *d)
                     +{
                     +    __asm__("pxor %%mm0, %%mm0  \n\t"
                     +            "movq %%mm0, %0     \n\t"
                     +            : "=m"(*(uint64_t*)d)
                     +            :: "mm0");
                     +}
+                    +
                     +#endif /* !HAVE_FAST_64BIT && defined(__MMX__) */
+                    +
                     +#ifdef __SSE__
+                    +
                     +#define AV_COPY128 AV_COPY128
                     +static av_always_inline void AV_COPY128(void *d, const void *s)
                     +{
                     +    struct v {uint64_t v[2];};
+                    +
                     +    __asm__("movaps   %1, %%xmm0  \n\t"
                     +            "movaps   %%xmm0, %0  \n\t"
                     +            : "=m"(*(struct v*)d)
                     +            : "m" (*(const struct v*)s)
                     +            : "xmm0");
                     +}
+                    +
                     +#endif /* __SSE__ */
+                    +
                     +#ifdef __SSE2__
+                    +
                     +#define AV_ZERO128 AV_ZERO128
                     +static av_always_inline void AV_ZERO128(void *d)
                     +{
                     +    struct v {uint64_t v[2];};
+                    +
                     +    __asm__("pxor %%xmm0, %%xmm0  \n\t"
                     +            "movdqa   %%xmm0, %0  \n\t"
                     +            : "=m"(*(struct v*)d)
                     +            :: "xmm0");
                     +}
+                    +
                     +#endif /* __SSE2__ */
+                    +
                     +#endif /* HAVE_MMX */
+                    +
                     +#endif /* AVUTIL_X86_INTREADWRITE_H */