* qatar/master:
float_dsp: ppc: add a separate header for Altivec function prototypes
ARM: fix float_dsp breakage from d5a7229
Add a float DSP framework to libavutil
PPC: Move types_altivec.h and util_altivec.h from libavcodec to libavutil
ARM: Move asm.S from libavcodec to libavutil
vc1dsp: mark put/avg_vc1_mspel_mc() always_inline
Merged-by: Michael Niedermayer <michaelni@gmx.at>
| ... | ... |
@@ -30,6 +30,7 @@ |
| 30 | 30 |
#ifndef AVCODEC_AAC_H |
| 31 | 31 |
#define AVCODEC_AAC_H |
| 32 | 32 |
|
| 33 |
+#include "libavutil/float_dsp.h" |
|
| 33 | 34 |
#include "avcodec.h" |
| 34 | 35 |
#include "dsputil.h" |
| 35 | 36 |
#include "fft.h" |
| ... | ... |
@@ -292,6 +293,7 @@ typedef struct {
|
| 292 | 292 |
FFTContext mdct_ltp; |
| 293 | 293 |
DSPContext dsp; |
| 294 | 294 |
FmtConvertContext fmt_conv; |
| 295 |
+ AVFloatDSPContext fdsp; |
|
| 295 | 296 |
int random_state; |
| 296 | 297 |
/** @} */ |
| 297 | 298 |
|
| ... | ... |
@@ -79,7 +79,7 @@ |
| 79 | 79 |
Parametric Stereo. |
| 80 | 80 |
*/ |
| 81 | 81 |
|
| 82 |
- |
|
| 82 |
+#include "libavutil/float_dsp.h" |
|
| 83 | 83 |
#include "avcodec.h" |
| 84 | 84 |
#include "internal.h" |
| 85 | 85 |
#include "get_bits.h" |
| ... | ... |
@@ -901,6 +901,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) |
| 901 | 901 |
|
| 902 | 902 |
ff_dsputil_init(&ac->dsp, avctx); |
| 903 | 903 |
ff_fmt_convert_init(&ac->fmt_conv, avctx); |
| 904 |
+ avpriv_float_dsp_init(&ac->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 904 | 905 |
|
| 905 | 906 |
ac->random_state = 0x1f2e3d4c; |
| 906 | 907 |
|
| ... | ... |
@@ -2069,10 +2070,10 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out, |
| 2069 | 2069 |
const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; |
| 2070 | 2070 |
|
| 2071 | 2071 |
if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
|
| 2072 |
- ac->dsp.vector_fmul(in, in, lwindow_prev, 1024); |
|
| 2072 |
+ ac->fdsp.vector_fmul(in, in, lwindow_prev, 1024); |
|
| 2073 | 2073 |
} else {
|
| 2074 | 2074 |
memset(in, 0, 448 * sizeof(float)); |
| 2075 |
- ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); |
|
| 2075 |
+ ac->fdsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); |
|
| 2076 | 2076 |
} |
| 2077 | 2077 |
if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
|
| 2078 | 2078 |
ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); |
| ... | ... |
@@ -30,6 +30,7 @@ |
| 30 | 30 |
* add temporal noise shaping |
| 31 | 31 |
***********************************/ |
| 32 | 32 |
|
| 33 |
+#include "libavutil/float_dsp.h" |
|
| 33 | 34 |
#include "libavutil/opt.h" |
| 34 | 35 |
#include "avcodec.h" |
| 35 | 36 |
#include "put_bits.h" |
| ... | ... |
@@ -182,7 +183,9 @@ static void put_audio_specific_config(AVCodecContext *avctx) |
| 182 | 182 |
} |
| 183 | 183 |
|
| 184 | 184 |
#define WINDOW_FUNC(type) \ |
| 185 |
-static void apply_ ##type ##_window(DSPContext *dsp, SingleChannelElement *sce, const float *audio) |
|
| 185 |
+static void apply_ ##type ##_window(DSPContext *dsp, AVFloatDSPContext *fdsp, \ |
|
| 186 |
+ SingleChannelElement *sce, \ |
|
| 187 |
+ const float *audio) |
|
| 186 | 188 |
|
| 187 | 189 |
WINDOW_FUNC(only_long) |
| 188 | 190 |
{
|
| ... | ... |
@@ -190,7 +193,7 @@ WINDOW_FUNC(only_long) |
| 190 | 190 |
const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; |
| 191 | 191 |
float *out = sce->ret; |
| 192 | 192 |
|
| 193 |
- dsp->vector_fmul (out, audio, lwindow, 1024); |
|
| 193 |
+ fdsp->vector_fmul (out, audio, lwindow, 1024); |
|
| 194 | 194 |
dsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024); |
| 195 | 195 |
} |
| 196 | 196 |
|
| ... | ... |
@@ -200,7 +203,7 @@ WINDOW_FUNC(long_start) |
| 200 | 200 |
const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; |
| 201 | 201 |
float *out = sce->ret; |
| 202 | 202 |
|
| 203 |
- dsp->vector_fmul(out, audio, lwindow, 1024); |
|
| 203 |
+ fdsp->vector_fmul(out, audio, lwindow, 1024); |
|
| 204 | 204 |
memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448); |
| 205 | 205 |
dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128); |
| 206 | 206 |
memset(out + 1024 + 576, 0, sizeof(out[0]) * 448); |
| ... | ... |
@@ -213,7 +216,7 @@ WINDOW_FUNC(long_stop) |
| 213 | 213 |
float *out = sce->ret; |
| 214 | 214 |
|
| 215 | 215 |
memset(out, 0, sizeof(out[0]) * 448); |
| 216 |
- dsp->vector_fmul(out + 448, audio + 448, swindow, 128); |
|
| 216 |
+ fdsp->vector_fmul(out + 448, audio + 448, swindow, 128); |
|
| 217 | 217 |
memcpy(out + 576, audio + 576, sizeof(out[0]) * 448); |
| 218 | 218 |
dsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024); |
| 219 | 219 |
} |
| ... | ... |
@@ -227,7 +230,7 @@ WINDOW_FUNC(eight_short) |
| 227 | 227 |
int w; |
| 228 | 228 |
|
| 229 | 229 |
for (w = 0; w < 8; w++) {
|
| 230 |
- dsp->vector_fmul (out, in, w ? pwindow : swindow, 128); |
|
| 230 |
+ fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128); |
|
| 231 | 231 |
out += 128; |
| 232 | 232 |
in += 128; |
| 233 | 233 |
dsp->vector_fmul_reverse(out, in, swindow, 128); |
| ... | ... |
@@ -235,7 +238,9 @@ WINDOW_FUNC(eight_short) |
| 235 | 235 |
} |
| 236 | 236 |
} |
| 237 | 237 |
|
| 238 |
-static void (*const apply_window[4])(DSPContext *dsp, SingleChannelElement *sce, const float *audio) = {
|
|
| 238 |
+static void (*const apply_window[4])(DSPContext *dsp, AVFloatDSPContext *fdsp, |
|
| 239 |
+ SingleChannelElement *sce, |
|
| 240 |
+ const float *audio) = {
|
|
| 239 | 241 |
[ONLY_LONG_SEQUENCE] = apply_only_long_window, |
| 240 | 242 |
[LONG_START_SEQUENCE] = apply_long_start_window, |
| 241 | 243 |
[EIGHT_SHORT_SEQUENCE] = apply_eight_short_window, |
| ... | ... |
@@ -248,7 +253,7 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, |
| 248 | 248 |
int i; |
| 249 | 249 |
float *output = sce->ret; |
| 250 | 250 |
|
| 251 |
- apply_window[sce->ics.window_sequence[0]](&s->dsp, sce, audio); |
|
| 251 |
+ apply_window[sce->ics.window_sequence[0]](&s->dsp, &s->fdsp, sce, audio); |
|
| 252 | 252 |
|
| 253 | 253 |
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) |
| 254 | 254 |
s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output); |
| ... | ... |
@@ -693,6 +698,7 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s) |
| 693 | 693 |
int ret = 0; |
| 694 | 694 |
|
| 695 | 695 |
ff_dsputil_init(&s->dsp, avctx); |
| 696 |
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 696 | 697 |
|
| 697 | 698 |
// window init |
| 698 | 699 |
ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); |
| ... | ... |
@@ -22,6 +22,7 @@ |
| 22 | 22 |
#ifndef AVCODEC_AACENC_H |
| 23 | 23 |
#define AVCODEC_AACENC_H |
| 24 | 24 |
|
| 25 |
+#include "libavutil/float_dsp.h" |
|
| 25 | 26 |
#include "avcodec.h" |
| 26 | 27 |
#include "put_bits.h" |
| 27 | 28 |
#include "dsputil.h" |
| ... | ... |
@@ -61,6 +62,7 @@ typedef struct AACEncContext {
|
| 61 | 61 |
FFTContext mdct1024; ///< long (1024 samples) frame transform context |
| 62 | 62 |
FFTContext mdct128; ///< short (128 samples) frame transform context |
| 63 | 63 |
DSPContext dsp; |
| 64 |
+ AVFloatDSPContext fdsp; |
|
| 64 | 65 |
float *planar_samples[6]; ///< saved preprocessed input |
| 65 | 66 |
|
| 66 | 67 |
int samplerate_index; ///< MPEG-4 samplerate index |
| ... | ... |
@@ -2491,6 +2491,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx) |
| 2491 | 2491 |
#endif |
| 2492 | 2492 |
|
| 2493 | 2493 |
ff_dsputil_init(&s->dsp, avctx); |
| 2494 |
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 2494 | 2495 |
ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); |
| 2495 | 2496 |
|
| 2496 | 2497 |
dprint_options(s); |
| ... | ... |
@@ -29,6 +29,8 @@ |
| 29 | 29 |
#define AVCODEC_AC3ENC_H |
| 30 | 30 |
|
| 31 | 31 |
#include <stdint.h> |
| 32 |
+ |
|
| 33 |
+#include "libavutil/float_dsp.h" |
|
| 32 | 34 |
#include "ac3.h" |
| 33 | 35 |
#include "ac3dsp.h" |
| 34 | 36 |
#include "avcodec.h" |
| ... | ... |
@@ -158,6 +160,7 @@ typedef struct AC3EncodeContext {
|
| 158 | 158 |
AVCodecContext *avctx; ///< parent AVCodecContext |
| 159 | 159 |
PutBitContext pb; ///< bitstream writer context |
| 160 | 160 |
DSPContext dsp; |
| 161 |
+ AVFloatDSPContext fdsp; |
|
| 161 | 162 |
AC3DSPContext ac3dsp; ///< AC-3 optimized functions |
| 162 | 163 |
FFTContext mdct; ///< FFT context for MDCT calculation |
| 163 | 164 |
const SampleType *mdct_window; ///< MDCT window function array |
| ... | ... |
@@ -68,10 +68,11 @@ av_cold int AC3_NAME(mdct_init)(AC3EncodeContext *s) |
| 68 | 68 |
/* |
| 69 | 69 |
* Apply KBD window to input samples prior to MDCT. |
| 70 | 70 |
*/ |
| 71 |
-static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input, |
|
| 71 |
+static void apply_window(void *dsp, int16_t *output, const int16_t *input, |
|
| 72 | 72 |
const int16_t *window, unsigned int len) |
| 73 | 73 |
{
|
| 74 |
- dsp->apply_window_int16(output, input, window, len); |
|
| 74 |
+ DSPContext *dsp0 = dsp; |
|
| 75 |
+ dsp0->apply_window_int16(output, input, window, len); |
|
| 75 | 76 |
} |
| 76 | 77 |
|
| 77 | 78 |
|
| ... | ... |
@@ -86,10 +86,12 @@ av_cold int ff_ac3_float_mdct_init(AC3EncodeContext *s) |
| 86 | 86 |
/* |
| 87 | 87 |
* Apply KBD window to input samples prior to MDCT. |
| 88 | 88 |
*/ |
| 89 |
-static void apply_window(DSPContext *dsp, float *output, const float *input, |
|
| 90 |
- const float *window, unsigned int len) |
|
| 89 |
+static void apply_window(void *dsp, float *output, |
|
| 90 |
+ const float *input, const float *window, |
|
| 91 |
+ unsigned int len) |
|
| 91 | 92 |
{
|
| 92 |
- dsp->vector_fmul(output, input, window, len); |
|
| 93 |
+ AVFloatDSPContext *fdsp = dsp; |
|
| 94 |
+ fdsp->vector_fmul(output, input, window, len); |
|
| 93 | 95 |
} |
| 94 | 96 |
|
| 95 | 97 |
|
| ... | ... |
@@ -33,7 +33,7 @@ |
| 33 | 33 |
|
| 34 | 34 |
static void scale_coefficients(AC3EncodeContext *s); |
| 35 | 35 |
|
| 36 |
-static void apply_window(DSPContext *dsp, SampleType *output, |
|
| 36 |
+static void apply_window(void *dsp, SampleType *output, |
|
| 37 | 37 |
const SampleType *input, const SampleType *window, |
| 38 | 38 |
unsigned int len); |
| 39 | 39 |
|
| ... | ... |
@@ -110,8 +110,13 @@ static void apply_mdct(AC3EncodeContext *s) |
| 110 | 110 |
AC3Block *block = &s->blocks[blk]; |
| 111 | 111 |
const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE]; |
| 112 | 112 |
|
| 113 |
+#if CONFIG_AC3ENC_FLOAT |
|
| 114 |
+ apply_window(&s->fdsp, s->windowed_samples, input_samples, |
|
| 115 |
+ s->mdct_window, AC3_WINDOW_SIZE); |
|
| 116 |
+#else |
|
| 113 | 117 |
apply_window(&s->dsp, s->windowed_samples, input_samples, |
| 114 | 118 |
s->mdct_window, AC3_WINDOW_SIZE); |
| 119 |
+#endif |
|
| 115 | 120 |
|
| 116 | 121 |
if (s->fixed_point) |
| 117 | 122 |
block->coeff_shift[ch+1] = normalize_samples(s); |
| 25 | 25 |
deleted file mode 100644 |
| ... | ... |
@@ -1,231 +0,0 @@ |
| 1 |
-/* |
|
| 2 |
- * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
| 3 |
- * |
|
| 4 |
- * This file is part of FFmpeg. |
|
| 5 |
- * |
|
| 6 |
- * FFmpeg is free software; you can redistribute it and/or |
|
| 7 |
- * modify it under the terms of the GNU Lesser General Public |
|
| 8 |
- * License as published by the Free Software Foundation; either |
|
| 9 |
- * version 2.1 of the License, or (at your option) any later version. |
|
| 10 |
- * |
|
| 11 |
- * FFmpeg is distributed in the hope that it will be useful, |
|
| 12 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 13 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 14 |
- * Lesser General Public License for more details. |
|
| 15 |
- * |
|
| 16 |
- * You should have received a copy of the GNU Lesser General Public |
|
| 17 |
- * License along with FFmpeg; if not, write to the Free Software |
|
| 18 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 19 |
- */ |
|
| 20 |
- |
|
| 21 |
-#include "config.h" |
|
| 22 |
- |
|
| 23 |
-#ifdef __ELF__ |
|
| 24 |
-# define ELF |
|
| 25 |
-#else |
|
| 26 |
-# define ELF @ |
|
| 27 |
-#endif |
|
| 28 |
- |
|
| 29 |
-#if CONFIG_THUMB |
|
| 30 |
-# define A @ |
|
| 31 |
-# define T |
|
| 32 |
-#else |
|
| 33 |
-# define A |
|
| 34 |
-# define T @ |
|
| 35 |
-#endif |
|
| 36 |
- |
|
| 37 |
-#if HAVE_NEON |
|
| 38 |
- .arch armv7-a |
|
| 39 |
-#elif HAVE_ARMV6T2 |
|
| 40 |
- .arch armv6t2 |
|
| 41 |
-#elif HAVE_ARMV6 |
|
| 42 |
- .arch armv6 |
|
| 43 |
-#elif HAVE_ARMV5TE |
|
| 44 |
- .arch armv5te |
|
| 45 |
-#endif |
|
| 46 |
- |
|
| 47 |
-#if HAVE_NEON |
|
| 48 |
- .fpu neon |
|
| 49 |
-#elif HAVE_ARMVFP |
|
| 50 |
- .fpu vfp |
|
| 51 |
-#endif |
|
| 52 |
- |
|
| 53 |
- .syntax unified |
|
| 54 |
-T .thumb |
|
| 55 |
- |
|
| 56 |
-.macro require8 val=1 |
|
| 57 |
-ELF .eabi_attribute 24, \val |
|
| 58 |
-.endm |
|
| 59 |
- |
|
| 60 |
-.macro preserve8 val=1 |
|
| 61 |
-ELF .eabi_attribute 25, \val |
|
| 62 |
-.endm |
|
| 63 |
- |
|
| 64 |
-.macro function name, export=0 |
|
| 65 |
- .macro endfunc |
|
| 66 |
-ELF .size \name, . - \name |
|
| 67 |
- .endfunc |
|
| 68 |
- .purgem endfunc |
|
| 69 |
- .endm |
|
| 70 |
- .text |
|
| 71 |
- .align 2 |
|
| 72 |
- .if \export |
|
| 73 |
- .global EXTERN_ASM\name |
|
| 74 |
-EXTERN_ASM\name: |
|
| 75 |
- .endif |
|
| 76 |
-ELF .type \name, %function |
|
| 77 |
- .func \name |
|
| 78 |
-\name: |
|
| 79 |
-.endm |
|
| 80 |
- |
|
| 81 |
-.macro const name, align=2 |
|
| 82 |
- .macro endconst |
|
| 83 |
-ELF .size \name, . - \name |
|
| 84 |
- .purgem endconst |
|
| 85 |
- .endm |
|
| 86 |
- .section .rodata |
|
| 87 |
- .align \align |
|
| 88 |
-\name: |
|
| 89 |
-.endm |
|
| 90 |
- |
|
| 91 |
-#if !HAVE_ARMV6T2 |
|
| 92 |
-.macro movw rd, val |
|
| 93 |
- mov \rd, \val & 255 |
|
| 94 |
- orr \rd, \val & ~255 |
|
| 95 |
-.endm |
|
| 96 |
-#endif |
|
| 97 |
- |
|
| 98 |
-.macro mov32 rd, val |
|
| 99 |
-#if HAVE_ARMV6T2 |
|
| 100 |
- movw \rd, #(\val) & 0xffff |
|
| 101 |
- .if (\val) >> 16 |
|
| 102 |
- movt \rd, #(\val) >> 16 |
|
| 103 |
- .endif |
|
| 104 |
-#else |
|
| 105 |
- ldr \rd, =\val |
|
| 106 |
-#endif |
|
| 107 |
-.endm |
|
| 108 |
- |
|
| 109 |
-.macro movrel rd, val |
|
| 110 |
-#if HAVE_ARMV6T2 && !CONFIG_PIC && !defined(__APPLE__) |
|
| 111 |
- movw \rd, #:lower16:\val |
|
| 112 |
- movt \rd, #:upper16:\val |
|
| 113 |
-#else |
|
| 114 |
- ldr \rd, =\val |
|
| 115 |
-#endif |
|
| 116 |
-.endm |
|
| 117 |
- |
|
| 118 |
-.macro ldr_pre rt, rn, rm:vararg |
|
| 119 |
-A ldr \rt, [\rn, \rm]! |
|
| 120 |
-T add \rn, \rn, \rm |
|
| 121 |
-T ldr \rt, [\rn] |
|
| 122 |
-.endm |
|
| 123 |
- |
|
| 124 |
-.macro ldr_dpre rt, rn, rm:vararg |
|
| 125 |
-A ldr \rt, [\rn, -\rm]! |
|
| 126 |
-T sub \rn, \rn, \rm |
|
| 127 |
-T ldr \rt, [\rn] |
|
| 128 |
-.endm |
|
| 129 |
- |
|
| 130 |
-.macro ldr_nreg rt, rn, rm:vararg |
|
| 131 |
-A ldr \rt, [\rn, -\rm] |
|
| 132 |
-T sub \rt, \rn, \rm |
|
| 133 |
-T ldr \rt, [\rt] |
|
| 134 |
-.endm |
|
| 135 |
- |
|
| 136 |
-.macro ldr_post rt, rn, rm:vararg |
|
| 137 |
-A ldr \rt, [\rn], \rm |
|
| 138 |
-T ldr \rt, [\rn] |
|
| 139 |
-T add \rn, \rn, \rm |
|
| 140 |
-.endm |
|
| 141 |
- |
|
| 142 |
-.macro ldrd_reg rt, rt2, rn, rm |
|
| 143 |
-A ldrd \rt, \rt2, [\rn, \rm] |
|
| 144 |
-T add \rt, \rn, \rm |
|
| 145 |
-T ldrd \rt, \rt2, [\rt] |
|
| 146 |
-.endm |
|
| 147 |
- |
|
| 148 |
-.macro ldrd_post rt, rt2, rn, rm |
|
| 149 |
-A ldrd \rt, \rt2, [\rn], \rm |
|
| 150 |
-T ldrd \rt, \rt2, [\rn] |
|
| 151 |
-T add \rn, \rn, \rm |
|
| 152 |
-.endm |
|
| 153 |
- |
|
| 154 |
-.macro ldrh_pre rt, rn, rm |
|
| 155 |
-A ldrh \rt, [\rn, \rm]! |
|
| 156 |
-T add \rn, \rn, \rm |
|
| 157 |
-T ldrh \rt, [\rn] |
|
| 158 |
-.endm |
|
| 159 |
- |
|
| 160 |
-.macro ldrh_dpre rt, rn, rm |
|
| 161 |
-A ldrh \rt, [\rn, -\rm]! |
|
| 162 |
-T sub \rn, \rn, \rm |
|
| 163 |
-T ldrh \rt, [\rn] |
|
| 164 |
-.endm |
|
| 165 |
- |
|
| 166 |
-.macro ldrh_post rt, rn, rm |
|
| 167 |
-A ldrh \rt, [\rn], \rm |
|
| 168 |
-T ldrh \rt, [\rn] |
|
| 169 |
-T add \rn, \rn, \rm |
|
| 170 |
-.endm |
|
| 171 |
- |
|
| 172 |
-.macro ldrb_post rt, rn, rm |
|
| 173 |
-A ldrb \rt, [\rn], \rm |
|
| 174 |
-T ldrb \rt, [\rn] |
|
| 175 |
-T add \rn, \rn, \rm |
|
| 176 |
-.endm |
|
| 177 |
- |
|
| 178 |
-.macro str_post rt, rn, rm:vararg |
|
| 179 |
-A str \rt, [\rn], \rm |
|
| 180 |
-T str \rt, [\rn] |
|
| 181 |
-T add \rn, \rn, \rm |
|
| 182 |
-.endm |
|
| 183 |
- |
|
| 184 |
-.macro strb_post rt, rn, rm:vararg |
|
| 185 |
-A strb \rt, [\rn], \rm |
|
| 186 |
-T strb \rt, [\rn] |
|
| 187 |
-T add \rn, \rn, \rm |
|
| 188 |
-.endm |
|
| 189 |
- |
|
| 190 |
-.macro strd_post rt, rt2, rn, rm |
|
| 191 |
-A strd \rt, \rt2, [\rn], \rm |
|
| 192 |
-T strd \rt, \rt2, [\rn] |
|
| 193 |
-T add \rn, \rn, \rm |
|
| 194 |
-.endm |
|
| 195 |
- |
|
| 196 |
-.macro strh_pre rt, rn, rm |
|
| 197 |
-A strh \rt, [\rn, \rm]! |
|
| 198 |
-T add \rn, \rn, \rm |
|
| 199 |
-T strh \rt, [\rn] |
|
| 200 |
-.endm |
|
| 201 |
- |
|
| 202 |
-.macro strh_dpre rt, rn, rm |
|
| 203 |
-A strh \rt, [\rn, -\rm]! |
|
| 204 |
-T sub \rn, \rn, \rm |
|
| 205 |
-T strh \rt, [\rn] |
|
| 206 |
-.endm |
|
| 207 |
- |
|
| 208 |
-.macro strh_post rt, rn, rm |
|
| 209 |
-A strh \rt, [\rn], \rm |
|
| 210 |
-T strh \rt, [\rn] |
|
| 211 |
-T add \rn, \rn, \rm |
|
| 212 |
-.endm |
|
| 213 |
- |
|
| 214 |
-.macro strh_dpost rt, rn, rm |
|
| 215 |
-A strh \rt, [\rn], -\rm |
|
| 216 |
-T strh \rt, [\rn] |
|
| 217 |
-T sub \rn, \rn, \rm |
|
| 218 |
-.endm |
|
| 219 |
- |
|
| 220 |
-#if HAVE_VFP_ARGS |
|
| 221 |
- .eabi_attribute 28, 1 |
|
| 222 |
-# define VFP |
|
| 223 |
-# define NOVFP @ |
|
| 224 |
-#else |
|
| 225 |
-# define VFP @ |
|
| 226 |
-# define NOVFP |
|
| 227 |
-#endif |
|
| 228 |
- |
|
| 229 |
-#define GLUE(a, b) a ## b |
|
| 230 |
-#define JOIN(a, b) GLUE(a, b) |
|
| 231 |
-#define X(s) JOIN(EXTERN_ASM, s) |
| ... | ... |
@@ -150,7 +150,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); |
| 150 | 150 |
void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); |
| 151 | 151 |
void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); |
| 152 | 152 |
|
| 153 |
-void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); |
|
| 154 | 153 |
void ff_vector_fmul_window_neon(float *dst, const float *src0, |
| 155 | 154 |
const float *src1, const float *win, int len); |
| 156 | 155 |
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, |
| ... | ... |
@@ -328,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) |
| 328 | 328 |
c->vp3_idct_dc_add = ff_vp3_idct_dc_add_neon; |
| 329 | 329 |
} |
| 330 | 330 |
|
| 331 |
- c->vector_fmul = ff_vector_fmul_neon; |
|
| 332 | 331 |
c->vector_fmul_window = ff_vector_fmul_window_neon; |
| 333 | 332 |
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; |
| 334 | 333 |
c->vector_fmac_scalar = ff_vector_fmac_scalar_neon; |
| ... | ... |
@@ -18,20 +18,13 @@ |
| 18 | 18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | 19 |
*/ |
| 20 | 20 |
|
| 21 |
-#include "libavutil/arm/cpu.h" |
|
| 22 | 21 |
#include "libavcodec/dsputil.h" |
| 23 | 22 |
#include "dsputil_arm.h" |
| 24 | 23 |
|
| 25 |
-void ff_vector_fmul_vfp(float *dst, const float *src0, |
|
| 26 |
- const float *src1, int len); |
|
| 27 | 24 |
void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, |
| 28 | 25 |
const float *src1, int len); |
| 29 | 26 |
|
| 30 | 27 |
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) |
| 31 | 28 |
{
|
| 32 |
- int cpu_flags = av_get_cpu_flags(); |
|
| 33 |
- |
|
| 34 |
- if (!have_vfpv3(cpu_flags)) |
|
| 35 |
- c->vector_fmul = ff_vector_fmul_vfp; |
|
| 36 | 29 |
c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; |
| 37 | 30 |
} |
| ... | ... |
@@ -20,7 +20,7 @@ |
| 20 | 20 |
*/ |
| 21 | 21 |
|
| 22 | 22 |
#include "config.h" |
| 23 |
-#include "asm.S" |
|
| 23 |
+#include "libavutil/arm/asm.S" |
|
| 24 | 24 |
|
| 25 | 25 |
preserve8 |
| 26 | 26 |
|
| ... | ... |
@@ -534,45 +534,6 @@ function ff_add_pixels_clamped_neon, export=1 |
| 534 | 534 |
bx lr |
| 535 | 535 |
endfunc |
| 536 | 536 |
|
| 537 |
-function ff_vector_fmul_neon, export=1 |
|
| 538 |
- subs r3, r3, #8 |
|
| 539 |
- vld1.32 {d0-d3}, [r1,:128]!
|
|
| 540 |
- vld1.32 {d4-d7}, [r2,:128]!
|
|
| 541 |
- vmul.f32 q8, q0, q2 |
|
| 542 |
- vmul.f32 q9, q1, q3 |
|
| 543 |
- beq 3f |
|
| 544 |
- bics ip, r3, #15 |
|
| 545 |
- beq 2f |
|
| 546 |
-1: subs ip, ip, #16 |
|
| 547 |
- vld1.32 {d0-d1}, [r1,:128]!
|
|
| 548 |
- vld1.32 {d4-d5}, [r2,:128]!
|
|
| 549 |
- vmul.f32 q10, q0, q2 |
|
| 550 |
- vld1.32 {d2-d3}, [r1,:128]!
|
|
| 551 |
- vld1.32 {d6-d7}, [r2,:128]!
|
|
| 552 |
- vmul.f32 q11, q1, q3 |
|
| 553 |
- vst1.32 {d16-d19},[r0,:128]!
|
|
| 554 |
- vld1.32 {d0-d1}, [r1,:128]!
|
|
| 555 |
- vld1.32 {d4-d5}, [r2,:128]!
|
|
| 556 |
- vmul.f32 q8, q0, q2 |
|
| 557 |
- vld1.32 {d2-d3}, [r1,:128]!
|
|
| 558 |
- vld1.32 {d6-d7}, [r2,:128]!
|
|
| 559 |
- vmul.f32 q9, q1, q3 |
|
| 560 |
- vst1.32 {d20-d23},[r0,:128]!
|
|
| 561 |
- bne 1b |
|
| 562 |
- ands r3, r3, #15 |
|
| 563 |
- beq 3f |
|
| 564 |
-2: vld1.32 {d0-d1}, [r1,:128]!
|
|
| 565 |
- vld1.32 {d4-d5}, [r2,:128]!
|
|
| 566 |
- vst1.32 {d16-d17},[r0,:128]!
|
|
| 567 |
- vmul.f32 q8, q0, q2 |
|
| 568 |
- vld1.32 {d2-d3}, [r1,:128]!
|
|
| 569 |
- vld1.32 {d6-d7}, [r2,:128]!
|
|
| 570 |
- vst1.32 {d18-d19},[r0,:128]!
|
|
| 571 |
- vmul.f32 q9, q1, q3 |
|
| 572 |
-3: vst1.32 {d16-d19},[r0,:128]!
|
|
| 573 |
- bx lr |
|
| 574 |
-endfunc |
|
| 575 |
- |
|
| 576 | 537 |
function ff_vector_fmul_window_neon, export=1 |
| 577 | 538 |
push {r4,r5,lr}
|
| 578 | 539 |
ldr lr, [sp, #12] |
| ... | ... |
@@ -19,7 +19,7 @@ |
| 19 | 19 |
*/ |
| 20 | 20 |
|
| 21 | 21 |
#include "config.h" |
| 22 |
-#include "asm.S" |
|
| 22 |
+#include "libavutil/arm/asm.S" |
|
| 23 | 23 |
|
| 24 | 24 |
/* |
| 25 | 25 |
* VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle |
| ... | ... |
@@ -37,53 +37,6 @@ |
| 37 | 37 |
*/ |
| 38 | 38 |
|
| 39 | 39 |
/** |
| 40 |
- * ARM VFP optimized implementation of 'vector_fmul_c' function. |
|
| 41 |
- * Assume that len is a positive number and is multiple of 8 |
|
| 42 |
- */ |
|
| 43 |
-@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len) |
|
| 44 |
-function ff_vector_fmul_vfp, export=1 |
|
| 45 |
- vpush {d8-d15}
|
|
| 46 |
- fmrx r12, fpscr |
|
| 47 |
- orr r12, r12, #(3 << 16) /* set vector size to 4 */ |
|
| 48 |
- fmxr fpscr, r12 |
|
| 49 |
- |
|
| 50 |
- vldmia r1!, {s0-s3}
|
|
| 51 |
- vldmia r2!, {s8-s11}
|
|
| 52 |
- vldmia r1!, {s4-s7}
|
|
| 53 |
- vldmia r2!, {s12-s15}
|
|
| 54 |
- vmul.f32 s8, s0, s8 |
|
| 55 |
-1: |
|
| 56 |
- subs r3, r3, #16 |
|
| 57 |
- vmul.f32 s12, s4, s12 |
|
| 58 |
- itttt ge |
|
| 59 |
- vldmiage r1!, {s16-s19}
|
|
| 60 |
- vldmiage r2!, {s24-s27}
|
|
| 61 |
- vldmiage r1!, {s20-s23}
|
|
| 62 |
- vldmiage r2!, {s28-s31}
|
|
| 63 |
- it ge |
|
| 64 |
- vmulge.f32 s24, s16, s24 |
|
| 65 |
- vstmia r0!, {s8-s11}
|
|
| 66 |
- vstmia r0!, {s12-s15}
|
|
| 67 |
- it ge |
|
| 68 |
- vmulge.f32 s28, s20, s28 |
|
| 69 |
- itttt gt |
|
| 70 |
- vldmiagt r1!, {s0-s3}
|
|
| 71 |
- vldmiagt r2!, {s8-s11}
|
|
| 72 |
- vldmiagt r1!, {s4-s7}
|
|
| 73 |
- vldmiagt r2!, {s12-s15}
|
|
| 74 |
- ittt ge |
|
| 75 |
- vmulge.f32 s8, s0, s8 |
|
| 76 |
- vstmiage r0!, {s24-s27}
|
|
| 77 |
- vstmiage r0!, {s28-s31}
|
|
| 78 |
- bgt 1b |
|
| 79 |
- |
|
| 80 |
- bic r12, r12, #(7 << 16) /* set vector size back to 1 */ |
|
| 81 |
- fmxr fpscr, r12 |
|
| 82 |
- vpop {d8-d15}
|
|
| 83 |
- bx lr |
|
| 84 |
-endfunc |
|
| 85 |
- |
|
| 86 |
-/** |
|
| 87 | 40 |
* ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. |
| 88 | 41 |
* Assume that len is a positive number and is multiple of 8 |
| 89 | 42 |
*/ |
| ... | ... |
@@ -18,7 +18,7 @@ |
| 18 | 18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | 19 |
*/ |
| 20 | 20 |
|
| 21 |
-#include "asm.S" |
|
| 21 |
+#include "libavutil/arm/asm.S" |
|
| 22 | 22 |
|
| 23 | 23 |
/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */ |
| 24 | 24 |
.macro h264_chroma_mc8 type, codec=h264 |
| ... | ... |
@@ -23,7 +23,7 @@ |
| 23 | 23 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 24 | 24 |
*/ |
| 25 | 25 |
|
| 26 |
-#include "asm.S" |
|
| 26 |
+#include "libavutil/arm/asm.S" |
|
| 27 | 27 |
|
| 28 | 28 |
/* useful constants for the algorithm, they are save in __constant_ptr__ at */ |
| 29 | 29 |
/* the end of the source code.*/ |
| ... | ... |
@@ -21,7 +21,7 @@ |
| 21 | 21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 22 | 22 |
*/ |
| 23 | 23 |
|
| 24 |
-#include "asm.S" |
|
| 24 |
+#include "libavutil/arm/asm.S" |
|
| 25 | 25 |
|
| 26 | 26 |
#define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |
| 27 | 27 |
#define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |
| ... | ... |
@@ -21,7 +21,7 @@ |
| 21 | 21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 22 | 22 |
*/ |
| 23 | 23 |
|
| 24 |
-#include "asm.S" |
|
| 24 |
+#include "libavutil/arm/asm.S" |
|
| 25 | 25 |
|
| 26 | 26 |
#define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |
| 27 | 27 |
#define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |
| ... | ... |
@@ -23,7 +23,7 @@ |
| 23 | 23 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 24 | 24 |
*/ |
| 25 | 25 |
|
| 26 |
-#include "asm.S" |
|
| 26 |
+#include "libavutil/arm/asm.S" |
|
| 27 | 27 |
|
| 28 | 28 |
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| 29 | 29 |
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
| ... | ... |
@@ -36,9 +36,9 @@ |
| 36 | 36 |
#include <stddef.h> |
| 37 | 37 |
#include <stdio.h> |
| 38 | 38 |
|
| 39 |
+#include "libavutil/float_dsp.h" |
|
| 39 | 40 |
#include "avcodec.h" |
| 40 | 41 |
#include "get_bits.h" |
| 41 |
-#include "dsputil.h" |
|
| 42 | 42 |
#include "bytestream.h" |
| 43 | 43 |
#include "fft.h" |
| 44 | 44 |
#include "fmtconvert.h" |
| ... | ... |
@@ -125,13 +125,13 @@ typedef struct {
|
| 125 | 125 |
|
| 126 | 126 |
FFTContext mdct_ctx; |
| 127 | 127 |
FmtConvertContext fmt_conv; |
| 128 |
+ AVFloatDSPContext fdsp; |
|
| 128 | 129 |
} ATRAC3Context; |
| 129 | 130 |
|
| 130 | 131 |
static DECLARE_ALIGNED(32, float, mdct_window)[MDCT_SIZE]; |
| 131 | 132 |
static VLC spectral_coeff_tab[7]; |
| 132 | 133 |
static float gain_tab1[16]; |
| 133 | 134 |
static float gain_tab2[31]; |
| 134 |
-static DSPContext dsp; |
|
| 135 | 135 |
|
| 136 | 136 |
|
| 137 | 137 |
/** |
| ... | ... |
@@ -164,7 +164,7 @@ static void IMLT(ATRAC3Context *q, float *pInput, float *pOutput, int odd_band) |
| 164 | 164 |
q->mdct_ctx.imdct_calc(&q->mdct_ctx,pOutput,pInput); |
| 165 | 165 |
|
| 166 | 166 |
/* Perform windowing on the output. */ |
| 167 |
- dsp.vector_fmul(pOutput, pOutput, mdct_window, MDCT_SIZE); |
|
| 167 |
+ q->fdsp.vector_fmul(pOutput, pOutput, mdct_window, MDCT_SIZE); |
|
| 168 | 168 |
|
| 169 | 169 |
} |
| 170 | 170 |
|
| ... | ... |
@@ -1039,7 +1039,7 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx) |
| 1039 | 1039 |
q->matrix_coeff_index_next[i] = 3; |
| 1040 | 1040 |
} |
| 1041 | 1041 |
|
| 1042 |
- ff_dsputil_init(&dsp, avctx); |
|
| 1042 |
+ avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 1043 | 1043 |
ff_fmt_convert_init(&q->fmt_conv, avctx); |
| 1044 | 1044 |
|
| 1045 | 1045 |
q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels); |
| ... | ... |
@@ -2471,12 +2471,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
| 2471 | 2471 |
WRAPPER8_16_SQ(rd8x8_c, rd16_c) |
| 2472 | 2472 |
WRAPPER8_16_SQ(bit8x8_c, bit16_c) |
| 2473 | 2473 |
|
| 2474 |
-static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
|
|
| 2475 |
- int i; |
|
| 2476 |
- for(i=0; i<len; i++) |
|
| 2477 |
- dst[i] = src0[i] * src1[i]; |
|
| 2478 |
-} |
|
| 2479 |
- |
|
| 2480 | 2474 |
static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
|
| 2481 | 2475 |
int i; |
| 2482 | 2476 |
src1 += len-1; |
| ... | ... |
@@ -3054,7 +3048,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) |
| 3054 | 3054 |
#if CONFIG_AC3_DECODER |
| 3055 | 3055 |
c->ac3_downmix = ff_ac3_downmix_c; |
| 3056 | 3056 |
#endif |
| 3057 |
- c->vector_fmul = vector_fmul_c; |
|
| 3058 | 3057 |
c->vector_fmul_reverse = vector_fmul_reverse_c; |
| 3059 | 3058 |
c->vector_fmul_add = vector_fmul_add_c; |
| 3060 | 3059 |
c->vector_fmul_window = vector_fmul_window_c; |
| ... | ... |
@@ -403,7 +403,6 @@ typedef struct DSPContext {
|
| 403 | 403 |
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); |
| 404 | 404 |
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); |
| 405 | 405 |
/* assume len is a multiple of 16, and arrays are 32-byte aligned */ |
| 406 |
- void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len); |
|
| 407 | 406 |
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); |
| 408 | 407 |
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */ |
| 409 | 408 |
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len); |
| ... | ... |
@@ -35,6 +35,7 @@ |
| 35 | 35 |
* http://wiki.multimedia.cx/index.php?title=Nellymoser |
| 36 | 36 |
*/ |
| 37 | 37 |
|
| 38 |
+#include "libavutil/float_dsp.h" |
|
| 38 | 39 |
#include "libavutil/mathematics.h" |
| 39 | 40 |
#include "nellymoser.h" |
| 40 | 41 |
#include "avcodec.h" |
| ... | ... |
@@ -55,6 +56,7 @@ typedef struct NellyMoserEncodeContext {
|
| 55 | 55 |
AVCodecContext *avctx; |
| 56 | 56 |
int last_frame; |
| 57 | 57 |
DSPContext dsp; |
| 58 |
+ AVFloatDSPContext fdsp; |
|
| 58 | 59 |
FFTContext mdct_ctx; |
| 59 | 60 |
AudioFrameQueue afq; |
| 60 | 61 |
DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES]; |
| ... | ... |
@@ -120,11 +122,11 @@ static void apply_mdct(NellyMoserEncodeContext *s) |
| 120 | 120 |
float *in1 = s->buf + NELLY_BUF_LEN; |
| 121 | 121 |
float *in2 = s->buf + 2 * NELLY_BUF_LEN; |
| 122 | 122 |
|
| 123 |
- s->dsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN); |
|
| 123 |
+ s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN); |
|
| 124 | 124 |
s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN); |
| 125 | 125 |
s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); |
| 126 | 126 |
|
| 127 |
- s->dsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN); |
|
| 127 |
+ s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN); |
|
| 128 | 128 |
s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN); |
| 129 | 129 |
s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff); |
| 130 | 130 |
} |
| ... | ... |
@@ -172,6 +174,7 @@ static av_cold int encode_init(AVCodecContext *avctx) |
| 172 | 172 |
if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0) |
| 173 | 173 |
goto error; |
| 174 | 174 |
ff_dsputil_init(&s->dsp, avctx); |
| 175 |
+ avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 175 | 176 |
|
| 176 | 177 |
/* Generate overlap window */ |
| 177 | 178 |
ff_init_ff_sine_windows(7); |
| ... | ... |
@@ -24,9 +24,9 @@ |
| 24 | 24 |
#if HAVE_ALTIVEC_H |
| 25 | 25 |
#include <altivec.h> |
| 26 | 26 |
#endif |
| 27 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 28 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 27 | 29 |
#include "libavcodec/dsputil.h" |
| 28 |
-#include "util_altivec.h" |
|
| 29 |
-#include "types_altivec.h" |
|
| 30 | 30 |
#include "dsputil_altivec.h" |
| 31 | 31 |
|
| 32 | 32 |
static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
| ... | ... |
@@ -19,9 +19,10 @@ |
| 19 | 19 |
* License along with FFmpeg; if not, write to the Free Software |
| 20 | 20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | 21 |
*/ |
| 22 |
+ |
|
| 23 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 24 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 22 | 25 |
#include "libavcodec/fft.h" |
| 23 |
-#include "util_altivec.h" |
|
| 24 |
-#include "types_altivec.h" |
|
| 25 | 26 |
|
| 26 | 27 |
/** |
| 27 | 28 |
* Do a complex FFT with the parameters defined in ff_fft_init(). The |
| ... | ... |
@@ -18,25 +18,10 @@ |
| 18 | 18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | 19 |
*/ |
| 20 | 20 |
|
| 21 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 21 | 22 |
#include "libavcodec/dsputil.h" |
| 22 | 23 |
|
| 23 | 24 |
#include "dsputil_altivec.h" |
| 24 |
-#include "util_altivec.h" |
|
| 25 |
- |
|
| 26 |
-static void vector_fmul_altivec(float *dst, const float *src0, const float *src1, int len) |
|
| 27 |
-{
|
|
| 28 |
- int i; |
|
| 29 |
- vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); |
|
| 30 |
- for(i=0; i<len-7; i+=8) {
|
|
| 31 |
- d0 = vec_ld(0, src0+i); |
|
| 32 |
- s = vec_ld(0, src1+i); |
|
| 33 |
- d1 = vec_ld(16, src0+i); |
|
| 34 |
- d0 = vec_madd(d0, s, zero); |
|
| 35 |
- d1 = vec_madd(d1, vec_ld(16,src1+i), zero); |
|
| 36 |
- vec_st(d0, 0, dst+i); |
|
| 37 |
- vec_st(d1, 16, dst+i); |
|
| 38 |
- } |
|
| 39 |
-} |
|
| 40 | 25 |
|
| 41 | 26 |
static void vector_fmul_reverse_altivec(float *dst, const float *src0, |
| 42 | 27 |
const float *src1, int len) |
| ... | ... |
@@ -124,7 +109,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa |
| 124 | 124 |
|
| 125 | 125 |
void ff_float_init_altivec(DSPContext* c, AVCodecContext *avctx) |
| 126 | 126 |
{
|
| 127 |
- c->vector_fmul = vector_fmul_altivec; |
|
| 128 | 127 |
c->vector_fmul_reverse = vector_fmul_reverse_altivec; |
| 129 | 128 |
c->vector_fmul_add = vector_fmul_add_altivec; |
| 130 | 129 |
if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
|
| ... | ... |
@@ -20,8 +20,8 @@ |
| 20 | 20 |
|
| 21 | 21 |
#include "libavcodec/fmtconvert.h" |
| 22 | 22 |
|
| 23 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 23 | 24 |
#include "dsputil_altivec.h" |
| 24 |
-#include "util_altivec.h" |
|
| 25 | 25 |
|
| 26 | 26 |
static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len) |
| 27 | 27 |
{
|
| ... | ... |
@@ -20,9 +20,9 @@ |
| 20 | 20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 | 21 |
*/ |
| 22 | 22 |
|
| 23 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 24 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 23 | 25 |
#include "libavcodec/dsputil.h" |
| 24 |
-#include "util_altivec.h" |
|
| 25 |
-#include "types_altivec.h" |
|
| 26 | 26 |
#include "dsputil_altivec.h" |
| 27 | 27 |
|
| 28 | 28 |
/* |
| ... | ... |
@@ -19,13 +19,13 @@ |
| 19 | 19 |
*/ |
| 20 | 20 |
|
| 21 | 21 |
#include "libavutil/cpu.h" |
| 22 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 23 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 22 | 24 |
#include "libavcodec/dsputil.h" |
| 23 | 25 |
#include "libavcodec/h264data.h" |
| 24 | 26 |
#include "libavcodec/h264dsp.h" |
| 25 | 27 |
|
| 26 | 28 |
#include "dsputil_altivec.h" |
| 27 |
-#include "util_altivec.h" |
|
| 28 |
-#include "types_altivec.h" |
|
| 29 | 29 |
|
| 30 | 30 |
#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s |
| 31 | 31 |
#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) |
| ... | ... |
@@ -28,12 +28,11 @@ |
| 28 | 28 |
#include <altivec.h> |
| 29 | 29 |
#endif |
| 30 | 30 |
|
| 31 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 31 | 32 |
#include "libavcodec/dsputil.h" |
| 32 | 33 |
|
| 33 | 34 |
#include "dsputil_altivec.h" |
| 34 | 35 |
|
| 35 |
-#include "types_altivec.h" |
|
| 36 |
- |
|
| 37 | 36 |
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, |
| 38 | 37 |
int size) {
|
| 39 | 38 |
int i, size16; |
| ... | ... |
@@ -23,12 +23,13 @@ |
| 23 | 23 |
|
| 24 | 24 |
#include <stdlib.h> |
| 25 | 25 |
#include <stdio.h> |
| 26 |
+ |
|
| 26 | 27 |
#include "libavutil/cpu.h" |
| 28 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 29 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 27 | 30 |
#include "libavcodec/dsputil.h" |
| 28 | 31 |
#include "libavcodec/mpegvideo.h" |
| 29 | 32 |
|
| 30 |
-#include "util_altivec.h" |
|
| 31 |
-#include "types_altivec.h" |
|
| 32 | 33 |
#include "dsputil_altivec.h" |
| 33 | 34 |
|
| 34 | 35 |
/* AltiVec version of dct_unquantize_h263 |
| 35 | 36 |
deleted file mode 100644 |
| ... | ... |
@@ -1,47 +0,0 @@ |
| 1 |
-/* |
|
| 2 |
- * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu> |
|
| 3 |
- * |
|
| 4 |
- * This file is part of FFmpeg. |
|
| 5 |
- * |
|
| 6 |
- * FFmpeg is free software; you can redistribute it and/or |
|
| 7 |
- * modify it under the terms of the GNU Lesser General Public |
|
| 8 |
- * License as published by the Free Software Foundation; either |
|
| 9 |
- * version 2.1 of the License, or (at your option) any later version. |
|
| 10 |
- * |
|
| 11 |
- * FFmpeg is distributed in the hope that it will be useful, |
|
| 12 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 13 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 14 |
- * Lesser General Public License for more details. |
|
| 15 |
- * |
|
| 16 |
- * You should have received a copy of the GNU Lesser General Public |
|
| 17 |
- * License along with FFmpeg; if not, write to the Free Software |
|
| 18 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 19 |
- */ |
|
| 20 |
- |
|
| 21 |
-#ifndef AVCODEC_PPC_TYPES_ALTIVEC_H |
|
| 22 |
-#define AVCODEC_PPC_TYPES_ALTIVEC_H |
|
| 23 |
- |
|
| 24 |
-/*********************************************************************** |
|
| 25 |
- * Vector types |
|
| 26 |
- **********************************************************************/ |
|
| 27 |
-#define vec_u8 vector unsigned char |
|
| 28 |
-#define vec_s8 vector signed char |
|
| 29 |
-#define vec_u16 vector unsigned short |
|
| 30 |
-#define vec_s16 vector signed short |
|
| 31 |
-#define vec_u32 vector unsigned int |
|
| 32 |
-#define vec_s32 vector signed int |
|
| 33 |
-#define vec_f vector float |
|
| 34 |
- |
|
| 35 |
-/*********************************************************************** |
|
| 36 |
- * Null vector |
|
| 37 |
- **********************************************************************/ |
|
| 38 |
-#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 ) |
|
| 39 |
- |
|
| 40 |
-#define zero_u8v (vec_u8) zerov |
|
| 41 |
-#define zero_s8v (vec_s8) zerov |
|
| 42 |
-#define zero_u16v (vec_u16) zerov |
|
| 43 |
-#define zero_s16v (vec_s16) zerov |
|
| 44 |
-#define zero_u32v (vec_u32) zerov |
|
| 45 |
-#define zero_s32v (vec_s32) zerov |
|
| 46 |
- |
|
| 47 |
-#endif /* AVCODEC_PPC_TYPES_ALTIVEC_H */ |
| 48 | 1 |
deleted file mode 100644 |
| ... | ... |
@@ -1,118 +0,0 @@ |
| 1 |
-/* |
|
| 2 |
- * This file is part of FFmpeg. |
|
| 3 |
- * |
|
| 4 |
- * FFmpeg is free software; you can redistribute it and/or |
|
| 5 |
- * modify it under the terms of the GNU Lesser General Public |
|
| 6 |
- * License as published by the Free Software Foundation; either |
|
| 7 |
- * version 2.1 of the License, or (at your option) any later version. |
|
| 8 |
- * |
|
| 9 |
- * FFmpeg is distributed in the hope that it will be useful, |
|
| 10 |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 11 |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 12 |
- * Lesser General Public License for more details. |
|
| 13 |
- * |
|
| 14 |
- * You should have received a copy of the GNU Lesser General Public |
|
| 15 |
- * License along with FFmpeg; if not, write to the Free Software |
|
| 16 |
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 17 |
- */ |
|
| 18 |
- |
|
| 19 |
-/** |
|
| 20 |
- * @file |
|
| 21 |
- * Contains misc utility macros and inline functions |
|
| 22 |
- */ |
|
| 23 |
- |
|
| 24 |
-#ifndef AVCODEC_PPC_UTIL_ALTIVEC_H |
|
| 25 |
-#define AVCODEC_PPC_UTIL_ALTIVEC_H |
|
| 26 |
- |
|
| 27 |
-#include <stdint.h> |
|
| 28 |
- |
|
| 29 |
-#include "config.h" |
|
| 30 |
- |
|
| 31 |
-#if HAVE_ALTIVEC_H |
|
| 32 |
-#include <altivec.h> |
|
| 33 |
-#endif |
|
| 34 |
- |
|
| 35 |
-#include "types_altivec.h" |
|
| 36 |
- |
|
| 37 |
-// used to build registers permutation vectors (vcprm) |
|
| 38 |
-// the 's' are for words in the _s_econd vector |
|
| 39 |
-#define WORD_0 0x00,0x01,0x02,0x03 |
|
| 40 |
-#define WORD_1 0x04,0x05,0x06,0x07 |
|
| 41 |
-#define WORD_2 0x08,0x09,0x0a,0x0b |
|
| 42 |
-#define WORD_3 0x0c,0x0d,0x0e,0x0f |
|
| 43 |
-#define WORD_s0 0x10,0x11,0x12,0x13 |
|
| 44 |
-#define WORD_s1 0x14,0x15,0x16,0x17 |
|
| 45 |
-#define WORD_s2 0x18,0x19,0x1a,0x1b |
|
| 46 |
-#define WORD_s3 0x1c,0x1d,0x1e,0x1f |
|
| 47 |
- |
|
| 48 |
-#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
|
|
| 49 |
-#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
|
|
| 50 |
- |
|
| 51 |
-// vcprmle is used to keep the same index as in the SSE version. |
|
| 52 |
-// it's the same as vcprm, with the index inversed |
|
| 53 |
-// ('le' is Little Endian)
|
|
| 54 |
-#define vcprmle(a,b,c,d) vcprm(d,c,b,a) |
|
| 55 |
- |
|
| 56 |
-// used to build inverse/identity vectors (vcii) |
|
| 57 |
-// n is _n_egative, p is _p_ositive |
|
| 58 |
-#define FLOAT_n -1. |
|
| 59 |
-#define FLOAT_p 1. |
|
| 60 |
- |
|
| 61 |
- |
|
| 62 |
-// Transpose 8x8 matrix of 16-bit elements (in-place) |
|
| 63 |
-#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ |
|
| 64 |
-do { \
|
|
| 65 |
- vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \ |
|
| 66 |
- vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \ |
|
| 67 |
- \ |
|
| 68 |
- A1 = vec_mergeh (a, e); \ |
|
| 69 |
- B1 = vec_mergel (a, e); \ |
|
| 70 |
- C1 = vec_mergeh (b, f); \ |
|
| 71 |
- D1 = vec_mergel (b, f); \ |
|
| 72 |
- E1 = vec_mergeh (c, g); \ |
|
| 73 |
- F1 = vec_mergel (c, g); \ |
|
| 74 |
- G1 = vec_mergeh (d, h); \ |
|
| 75 |
- H1 = vec_mergel (d, h); \ |
|
| 76 |
- \ |
|
| 77 |
- A2 = vec_mergeh (A1, E1); \ |
|
| 78 |
- B2 = vec_mergel (A1, E1); \ |
|
| 79 |
- C2 = vec_mergeh (B1, F1); \ |
|
| 80 |
- D2 = vec_mergel (B1, F1); \ |
|
| 81 |
- E2 = vec_mergeh (C1, G1); \ |
|
| 82 |
- F2 = vec_mergel (C1, G1); \ |
|
| 83 |
- G2 = vec_mergeh (D1, H1); \ |
|
| 84 |
- H2 = vec_mergel (D1, H1); \ |
|
| 85 |
- \ |
|
| 86 |
- a = vec_mergeh (A2, E2); \ |
|
| 87 |
- b = vec_mergel (A2, E2); \ |
|
| 88 |
- c = vec_mergeh (B2, F2); \ |
|
| 89 |
- d = vec_mergel (B2, F2); \ |
|
| 90 |
- e = vec_mergeh (C2, G2); \ |
|
| 91 |
- f = vec_mergel (C2, G2); \ |
|
| 92 |
- g = vec_mergeh (D2, H2); \ |
|
| 93 |
- h = vec_mergel (D2, H2); \ |
|
| 94 |
-} while (0) |
|
| 95 |
- |
|
| 96 |
- |
|
| 97 |
-/** @brief loads unaligned vector @a *src with offset @a offset |
|
| 98 |
- and returns it */ |
|
| 99 |
-static inline vector unsigned char unaligned_load(int offset, uint8_t *src) |
|
| 100 |
-{
|
|
| 101 |
- register vector unsigned char first = vec_ld(offset, src); |
|
| 102 |
- register vector unsigned char second = vec_ld(offset+15, src); |
|
| 103 |
- register vector unsigned char mask = vec_lvsl(offset, src); |
|
| 104 |
- return vec_perm(first, second, mask); |
|
| 105 |
-} |
|
| 106 |
- |
|
| 107 |
-/** |
|
| 108 |
- * loads vector known misalignment |
|
| 109 |
- * @param perm_vec the align permute vector to combine the two loads from lvsl |
|
| 110 |
- */ |
|
| 111 |
-static inline vec_u8 load_with_perm_vec(int offset, uint8_t *src, vec_u8 perm_vec) |
|
| 112 |
-{
|
|
| 113 |
- vec_u8 a = vec_ld(offset, src); |
|
| 114 |
- vec_u8 b = vec_ld(offset+15, src); |
|
| 115 |
- return vec_perm(a, b, perm_vec); |
|
| 116 |
-} |
|
| 117 |
- |
|
| 118 |
-#endif /* AVCODEC_PPC_UTIL_ALTIVEC_H */ |
| ... | ... |
@@ -19,12 +19,11 @@ |
| 19 | 19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | 20 |
*/ |
| 21 | 21 |
|
| 22 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 23 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 22 | 24 |
#include "libavcodec/dsputil.h" |
| 23 | 25 |
#include "libavcodec/vc1dsp.h" |
| 24 | 26 |
|
| 25 |
-#include "util_altivec.h" |
|
| 26 |
-#include "dsputil_altivec.h" |
|
| 27 |
- |
|
| 28 | 27 |
// main steps of 8x8 transform |
| 29 | 28 |
#define STEP8(s0, s1, s2, s3, s4, s5, s6, s7, vec_rnd) \ |
| 30 | 29 |
do { \
|
| ... | ... |
@@ -18,9 +18,9 @@ |
| 18 | 18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | 19 |
*/ |
| 20 | 20 |
|
| 21 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 22 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 21 | 23 |
#include "libavcodec/dsputil.h" |
| 22 |
-#include "util_altivec.h" |
|
| 23 |
-#include "types_altivec.h" |
|
| 24 | 24 |
#include "dsputil_altivec.h" |
| 25 | 25 |
|
| 26 | 26 |
static const vec_s16 constants = |
| ... | ... |
@@ -21,10 +21,10 @@ |
| 21 | 21 |
*/ |
| 22 | 22 |
|
| 23 | 23 |
#include "libavutil/cpu.h" |
| 24 |
+#include "libavutil/ppc/types_altivec.h" |
|
| 25 |
+#include "libavutil/ppc/util_altivec.h" |
|
| 24 | 26 |
#include "libavcodec/vp8dsp.h" |
| 25 | 27 |
#include "dsputil_altivec.h" |
| 26 |
-#include "types_altivec.h" |
|
| 27 |
-#include "util_altivec.h" |
|
| 28 | 28 |
|
| 29 | 29 |
#define REPT4(...) { __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__ }
|
| 30 | 30 |
|
| ... | ... |
@@ -19,6 +19,7 @@ |
| 19 | 19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | 20 |
*/ |
| 21 | 21 |
|
| 22 |
+#include "libavutil/float_dsp.h" |
|
| 22 | 23 |
#include "avcodec.h" |
| 23 | 24 |
#define BITSTREAM_READER_LE |
| 24 | 25 |
#include "get_bits.h" |
| ... | ... |
@@ -26,7 +27,6 @@ |
| 26 | 26 |
#include "lpc.h" |
| 27 | 27 |
#include "celp_math.h" |
| 28 | 28 |
#include "celp_filters.h" |
| 29 |
-#include "dsputil.h" |
|
| 30 | 29 |
|
| 31 | 30 |
#define MAX_BACKWARD_FILTER_ORDER 36 |
| 32 | 31 |
#define MAX_BACKWARD_FILTER_LEN 40 |
| ... | ... |
@@ -38,6 +38,7 @@ |
| 38 | 38 |
typedef struct {
|
| 39 | 39 |
AVFrame frame; |
| 40 | 40 |
DSPContext dsp; |
| 41 |
+ AVFloatDSPContext fdsp; |
|
| 41 | 42 |
DECLARE_ALIGNED(32, float, sp_lpc)[FFALIGN(36, 16)]; ///< LPC coefficients for speech data (spec: A) |
| 42 | 43 |
DECLARE_ALIGNED(32, float, gain_lpc)[FFALIGN(10, 16)]; ///< LPC coefficients for gain (spec: GB) |
| 43 | 44 |
|
| ... | ... |
@@ -62,7 +63,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx) |
| 62 | 62 |
{
|
| 63 | 63 |
RA288Context *ractx = avctx->priv_data; |
| 64 | 64 |
avctx->sample_fmt = AV_SAMPLE_FMT_FLT; |
| 65 |
- ff_dsputil_init(&ractx->dsp, avctx); |
|
| 65 |
+ avpriv_float_dsp_init(&ractx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 66 | 66 |
|
| 67 | 67 |
avcodec_get_frame_defaults(&ractx->frame); |
| 68 | 68 |
avctx->coded_frame = &ractx->frame; |
| ... | ... |
@@ -137,7 +138,7 @@ static void do_hybrid_window(RA288Context *ractx, |
| 137 | 137 |
MAX_BACKWARD_FILTER_LEN + |
| 138 | 138 |
MAX_BACKWARD_FILTER_NONREC, 16)]); |
| 139 | 139 |
|
| 140 |
- ractx->dsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16)); |
|
| 140 |
+ ractx->fdsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16)); |
|
| 141 | 141 |
|
| 142 | 142 |
convolve(buffer1, work + order , n , order); |
| 143 | 143 |
convolve(buffer2, work + order + n, non_rec, order); |
| ... | ... |
@@ -164,7 +165,7 @@ static void backward_filter(RA288Context *ractx, |
| 164 | 164 |
do_hybrid_window(ractx, order, n, non_rec, temp, hist, rec, window); |
| 165 | 165 |
|
| 166 | 166 |
if (!compute_lpc_coefs(temp, order, lpc, 0, 1, 1)) |
| 167 |
- ractx->dsp.vector_fmul(lpc, lpc, tab, FFALIGN(order, 16)); |
|
| 167 |
+ ractx->fdsp.vector_fmul(lpc, lpc, tab, FFALIGN(order, 16)); |
|
| 168 | 168 |
|
| 169 | 169 |
memmove(hist, hist + n, move_size*sizeof(*hist)); |
| 170 | 170 |
} |
| ... | ... |
@@ -19,6 +19,7 @@ |
| 19 | 19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | 20 |
*/ |
| 21 | 21 |
|
| 22 |
+#include "libavutil/float_dsp.h" |
|
| 22 | 23 |
#include "avcodec.h" |
| 23 | 24 |
#include "get_bits.h" |
| 24 | 25 |
#include "dsputil.h" |
| ... | ... |
@@ -176,6 +177,7 @@ typedef struct TwinContext {
|
| 176 | 176 |
AVCodecContext *avctx; |
| 177 | 177 |
AVFrame frame; |
| 178 | 178 |
DSPContext dsp; |
| 179 |
+ AVFloatDSPContext fdsp; |
|
| 179 | 180 |
FFTContext mdct_ctx[3]; |
| 180 | 181 |
|
| 181 | 182 |
const ModeTab *mtab; |
| ... | ... |
@@ -787,8 +789,8 @@ static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb, |
| 787 | 787 |
dec_bark_env(tctx, bark1[i][j], bark_use_hist[i][j], i, |
| 788 | 788 |
tctx->tmp_buf, gain[sub*i+j], ftype); |
| 789 | 789 |
|
| 790 |
- tctx->dsp.vector_fmul(chunk + block_size*j, chunk + block_size*j, tctx->tmp_buf, |
|
| 791 |
- block_size); |
|
| 790 |
+ tctx->fdsp.vector_fmul(chunk + block_size*j, chunk + block_size*j, |
|
| 791 |
+ tctx->tmp_buf, block_size); |
|
| 792 | 792 |
|
| 793 | 793 |
} |
| 794 | 794 |
|
| ... | ... |
@@ -809,7 +811,7 @@ static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb, |
| 809 | 809 |
dec_lpc_spectrum_inv(tctx, lsp, ftype, tctx->tmp_buf); |
| 810 | 810 |
|
| 811 | 811 |
for (j = 0; j < mtab->fmode[ftype].sub; j++) {
|
| 812 |
- tctx->dsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size); |
|
| 812 |
+ tctx->fdsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size); |
|
| 813 | 813 |
chunk += block_size; |
| 814 | 814 |
} |
| 815 | 815 |
} |
| ... | ... |
@@ -1156,6 +1158,7 @@ static av_cold int twin_decode_init(AVCodecContext *avctx) |
| 1156 | 1156 |
} |
| 1157 | 1157 |
|
| 1158 | 1158 |
ff_dsputil_init(&tctx->dsp, avctx); |
| 1159 |
+ avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); |
|
| 1159 | 1160 |
if ((ret = init_mdct_win(tctx))) {
|
| 1160 | 1161 |
av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); |
| 1161 | 1162 |
twin_decode_close(avctx); |
| ... | ... |
@@ -561,7 +561,7 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int |
| 561 | 561 |
/** Function used to do motion compensation with bicubic interpolation |
| 562 | 562 |
*/ |
| 563 | 563 |
#define VC1_MSPEL_MC(OP, OPNAME)\ |
| 564 |
-static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\ |
|
| 564 |
+static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\ |
|
| 565 | 565 |
{\
|
| 566 | 566 |
int i, j;\ |
| 567 | 567 |
\ |
| ... | ... |
@@ -30,6 +30,7 @@ |
| 30 | 30 |
#include <math.h> |
| 31 | 31 |
|
| 32 | 32 |
#define BITSTREAM_READER_LE |
| 33 |
+#include "libavutil/float_dsp.h" |
|
| 33 | 34 |
#include "avcodec.h" |
| 34 | 35 |
#include "get_bits.h" |
| 35 | 36 |
#include "dsputil.h" |
| ... | ... |
@@ -128,6 +129,7 @@ typedef struct vorbis_context_s {
|
| 128 | 128 |
AVFrame frame; |
| 129 | 129 |
GetBitContext gb; |
| 130 | 130 |
DSPContext dsp; |
| 131 |
+ AVFloatDSPContext fdsp; |
|
| 131 | 132 |
FmtConvertContext fmt_conv; |
| 132 | 133 |
|
| 133 | 134 |
FFTContext mdct[2]; |
| ... | ... |
@@ -987,6 +989,7 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext) |
| 987 | 987 |
|
| 988 | 988 |
vc->avccontext = avccontext; |
| 989 | 989 |
ff_dsputil_init(&vc->dsp, avccontext); |
| 990 |
+ avpriv_float_dsp_init(&vc->fdsp, avccontext->flags & CODEC_FLAG_BITEXACT); |
|
| 990 | 991 |
ff_fmt_convert_init(&vc->fmt_conv, avccontext); |
| 991 | 992 |
|
| 992 | 993 |
if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
|
| ... | ... |
@@ -1609,7 +1612,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) |
| 1609 | 1609 |
for (j = vc->audio_channels-1;j >= 0; j--) {
|
| 1610 | 1610 |
ch_floor_ptr = vc->channel_floors + j * blocksize / 2; |
| 1611 | 1611 |
ch_res_ptr = vc->channel_residues + res_chan[j] * blocksize / 2; |
| 1612 |
- vc->dsp.vector_fmul(ch_floor_ptr, ch_floor_ptr, ch_res_ptr, blocksize / 2); |
|
| 1612 |
+ vc->fdsp.vector_fmul(ch_floor_ptr, ch_floor_ptr, ch_res_ptr, blocksize / 2); |
|
| 1613 | 1613 |
mdct->imdct_half(mdct, ch_res_ptr, ch_floor_ptr); |
| 1614 | 1614 |
} |
| 1615 | 1615 |
|
| ... | ... |
@@ -2581,11 +2581,6 @@ int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, |
| 2581 | 2581 |
|
| 2582 | 2582 |
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); |
| 2583 | 2583 |
|
| 2584 |
-void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, |
|
| 2585 |
- int len); |
|
| 2586 |
-void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, |
|
| 2587 |
- int len); |
|
| 2588 |
- |
|
| 2589 | 2584 |
void ff_vector_fmul_reverse_sse(float *dst, const float *src0, |
| 2590 | 2585 |
const float *src1, int len); |
| 2591 | 2586 |
void ff_vector_fmul_reverse_avx(float *dst, const float *src0, |
| ... | ... |
@@ -2915,7 +2910,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
| 2915 | 2915 |
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
| 2916 | 2916 |
c->ac3_downmix = ac3_downmix_sse; |
| 2917 | 2917 |
#if HAVE_YASM |
| 2918 |
- c->vector_fmul = ff_vector_fmul_sse; |
|
| 2919 | 2918 |
c->vector_fmul_reverse = ff_vector_fmul_reverse_sse; |
| 2920 | 2919 |
c->vector_fmul_add = ff_vector_fmul_add_sse; |
| 2921 | 2920 |
#endif |
| ... | ... |
@@ -3077,7 +3071,6 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
| 3077 | 3077 |
} |
| 3078 | 3078 |
} |
| 3079 | 3079 |
c->butterflies_float_interleave = ff_butterflies_float_interleave_avx; |
| 3080 |
- c->vector_fmul = ff_vector_fmul_avx; |
|
| 3081 | 3080 |
c->vector_fmul_reverse = ff_vector_fmul_reverse_avx; |
| 3082 | 3081 |
c->vector_fmul_add = ff_vector_fmul_add_avx; |
| 3083 | 3082 |
#endif |
| ... | ... |
@@ -1130,38 +1130,6 @@ VECTOR_CLIP_INT32 6, 1, 0, 0 |
| 1130 | 1130 |
%endif |
| 1131 | 1131 |
|
| 1132 | 1132 |
;----------------------------------------------------------------------------- |
| 1133 |
-; void vector_fmul(float *dst, const float *src0, const float *src1, int len) |
|
| 1134 |
-;----------------------------------------------------------------------------- |
|
| 1135 |
-%macro VECTOR_FMUL 0 |
|
| 1136 |
-cglobal vector_fmul, 4,4,2, dst, src0, src1, len |
|
| 1137 |
- lea lenq, [lend*4 - 2*mmsize] |
|
| 1138 |
-ALIGN 16 |
|
| 1139 |
-.loop |
|
| 1140 |
- mova m0, [src0q + lenq] |
|
| 1141 |
- mova m1, [src0q + lenq + mmsize] |
|
| 1142 |
- mulps m0, m0, [src1q + lenq] |
|
| 1143 |
- mulps m1, m1, [src1q + lenq + mmsize] |
|
| 1144 |
- mova [dstq + lenq], m0 |
|
| 1145 |
- mova [dstq + lenq + mmsize], m1 |
|
| 1146 |
- |
|
| 1147 |
- sub lenq, 2*mmsize |
|
| 1148 |
- jge .loop |
|
| 1149 |
-%if mmsize == 32 |
|
| 1150 |
- vzeroupper |
|
| 1151 |
- RET |
|
| 1152 |
-%else |
|
| 1153 |
- REP_RET |
|
| 1154 |
-%endif |
|
| 1155 |
-%endmacro |
|
| 1156 |
- |
|
| 1157 |
-INIT_XMM sse |
|
| 1158 |
-VECTOR_FMUL |
|
| 1159 |
-%if HAVE_AVX |
|
| 1160 |
-INIT_YMM avx |
|
| 1161 |
-VECTOR_FMUL |
|
| 1162 |
-%endif |
|
| 1163 |
- |
|
| 1164 |
-;----------------------------------------------------------------------------- |
|
| 1165 | 1133 |
; void vector_fmul_reverse(float *dst, const float *src0, const float *src1, |
| 1166 | 1134 |
; int len) |
| 1167 | 1135 |
;----------------------------------------------------------------------------- |
| 2 | 9 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,231 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "config.h" |
|
| 21 |
+ |
|
| 22 |
+#ifdef __ELF__ |
|
| 23 |
+# define ELF |
|
| 24 |
+#else |
|
| 25 |
+# define ELF @ |
|
| 26 |
+#endif |
|
| 27 |
+ |
|
| 28 |
+#if CONFIG_THUMB |
|
| 29 |
+# define A @ |
|
| 30 |
+# define T |
|
| 31 |
+#else |
|
| 32 |
+# define A |
|
| 33 |
+# define T @ |
|
| 34 |
+#endif |
|
| 35 |
+ |
|
| 36 |
+#if HAVE_NEON |
|
| 37 |
+ .arch armv7-a |
|
| 38 |
+#elif HAVE_ARMV6T2 |
|
| 39 |
+ .arch armv6t2 |
|
| 40 |
+#elif HAVE_ARMV6 |
|
| 41 |
+ .arch armv6 |
|
| 42 |
+#elif HAVE_ARMV5TE |
|
| 43 |
+ .arch armv5te |
|
| 44 |
+#endif |
|
| 45 |
+ |
|
| 46 |
+#if HAVE_NEON |
|
| 47 |
+ .fpu neon |
|
| 48 |
+#elif HAVE_ARMVFP |
|
| 49 |
+ .fpu vfp |
|
| 50 |
+#endif |
|
| 51 |
+ |
|
| 52 |
+ .syntax unified |
|
| 53 |
+T .thumb |
|
| 54 |
+ |
|
| 55 |
+.macro require8 val=1 |
|
| 56 |
+ELF .eabi_attribute 24, \val |
|
| 57 |
+.endm |
|
| 58 |
+ |
|
| 59 |
+.macro preserve8 val=1 |
|
| 60 |
+ELF .eabi_attribute 25, \val |
|
| 61 |
+.endm |
|
| 62 |
+ |
|
| 63 |
+.macro function name, export=0 |
|
| 64 |
+ .macro endfunc |
|
| 65 |
+ELF .size \name, . - \name |
|
| 66 |
+ .endfunc |
|
| 67 |
+ .purgem endfunc |
|
| 68 |
+ .endm |
|
| 69 |
+ .text |
|
| 70 |
+ .align 2 |
|
| 71 |
+ .if \export |
|
| 72 |
+ .global EXTERN_ASM\name |
|
| 73 |
+EXTERN_ASM\name: |
|
| 74 |
+ .endif |
|
| 75 |
+ELF .type \name, %function |
|
| 76 |
+ .func \name |
|
| 77 |
+\name: |
|
| 78 |
+.endm |
|
| 79 |
+ |
|
| 80 |
+.macro const name, align=2 |
|
| 81 |
+ .macro endconst |
|
| 82 |
+ELF .size \name, . - \name |
|
| 83 |
+ .purgem endconst |
|
| 84 |
+ .endm |
|
| 85 |
+ .section .rodata |
|
| 86 |
+ .align \align |
|
| 87 |
+\name: |
|
| 88 |
+.endm |
|
| 89 |
+ |
|
| 90 |
+#if !HAVE_ARMV6T2 |
|
| 91 |
+.macro movw rd, val |
|
| 92 |
+ mov \rd, \val & 255 |
|
| 93 |
+ orr \rd, \val & ~255 |
|
| 94 |
+.endm |
|
| 95 |
+#endif |
|
| 96 |
+ |
|
| 97 |
+.macro mov32 rd, val |
|
| 98 |
+#if HAVE_ARMV6T2 |
|
| 99 |
+ movw \rd, #(\val) & 0xffff |
|
| 100 |
+ .if (\val) >> 16 |
|
| 101 |
+ movt \rd, #(\val) >> 16 |
|
| 102 |
+ .endif |
|
| 103 |
+#else |
|
| 104 |
+ ldr \rd, =\val |
|
| 105 |
+#endif |
|
| 106 |
+.endm |
|
| 107 |
+ |
|
| 108 |
+.macro movrel rd, val |
|
| 109 |
+#if HAVE_ARMV6T2 && !CONFIG_PIC && !defined(__APPLE__) |
|
| 110 |
+ movw \rd, #:lower16:\val |
|
| 111 |
+ movt \rd, #:upper16:\val |
|
| 112 |
+#else |
|
| 113 |
+ ldr \rd, =\val |
|
| 114 |
+#endif |
|
| 115 |
+.endm |
|
| 116 |
+ |
|
| 117 |
+.macro ldr_pre rt, rn, rm:vararg |
|
| 118 |
+A ldr \rt, [\rn, \rm]! |
|
| 119 |
+T add \rn, \rn, \rm |
|
| 120 |
+T ldr \rt, [\rn] |
|
| 121 |
+.endm |
|
| 122 |
+ |
|
| 123 |
+.macro ldr_dpre rt, rn, rm:vararg |
|
| 124 |
+A ldr \rt, [\rn, -\rm]! |
|
| 125 |
+T sub \rn, \rn, \rm |
|
| 126 |
+T ldr \rt, [\rn] |
|
| 127 |
+.endm |
|
| 128 |
+ |
|
| 129 |
+.macro ldr_nreg rt, rn, rm:vararg |
|
| 130 |
+A ldr \rt, [\rn, -\rm] |
|
| 131 |
+T sub \rt, \rn, \rm |
|
| 132 |
+T ldr \rt, [\rt] |
|
| 133 |
+.endm |
|
| 134 |
+ |
|
| 135 |
+.macro ldr_post rt, rn, rm:vararg |
|
| 136 |
+A ldr \rt, [\rn], \rm |
|
| 137 |
+T ldr \rt, [\rn] |
|
| 138 |
+T add \rn, \rn, \rm |
|
| 139 |
+.endm |
|
| 140 |
+ |
|
| 141 |
+.macro ldrd_reg rt, rt2, rn, rm |
|
| 142 |
+A ldrd \rt, \rt2, [\rn, \rm] |
|
| 143 |
+T add \rt, \rn, \rm |
|
| 144 |
+T ldrd \rt, \rt2, [\rt] |
|
| 145 |
+.endm |
|
| 146 |
+ |
|
| 147 |
+.macro ldrd_post rt, rt2, rn, rm |
|
| 148 |
+A ldrd \rt, \rt2, [\rn], \rm |
|
| 149 |
+T ldrd \rt, \rt2, [\rn] |
|
| 150 |
+T add \rn, \rn, \rm |
|
| 151 |
+.endm |
|
| 152 |
+ |
|
| 153 |
+.macro ldrh_pre rt, rn, rm |
|
| 154 |
+A ldrh \rt, [\rn, \rm]! |
|
| 155 |
+T add \rn, \rn, \rm |
|
| 156 |
+T ldrh \rt, [\rn] |
|
| 157 |
+.endm |
|
| 158 |
+ |
|
| 159 |
+.macro ldrh_dpre rt, rn, rm |
|
| 160 |
+A ldrh \rt, [\rn, -\rm]! |
|
| 161 |
+T sub \rn, \rn, \rm |
|
| 162 |
+T ldrh \rt, [\rn] |
|
| 163 |
+.endm |
|
| 164 |
+ |
|
| 165 |
+.macro ldrh_post rt, rn, rm |
|
| 166 |
+A ldrh \rt, [\rn], \rm |
|
| 167 |
+T ldrh \rt, [\rn] |
|
| 168 |
+T add \rn, \rn, \rm |
|
| 169 |
+.endm |
|
| 170 |
+ |
|
| 171 |
+.macro ldrb_post rt, rn, rm |
|
| 172 |
+A ldrb \rt, [\rn], \rm |
|
| 173 |
+T ldrb \rt, [\rn] |
|
| 174 |
+T add \rn, \rn, \rm |
|
| 175 |
+.endm |
|
| 176 |
+ |
|
| 177 |
+.macro str_post rt, rn, rm:vararg |
|
| 178 |
+A str \rt, [\rn], \rm |
|
| 179 |
+T str \rt, [\rn] |
|
| 180 |
+T add \rn, \rn, \rm |
|
| 181 |
+.endm |
|
| 182 |
+ |
|
| 183 |
+.macro strb_post rt, rn, rm:vararg |
|
| 184 |
+A strb \rt, [\rn], \rm |
|
| 185 |
+T strb \rt, [\rn] |
|
| 186 |
+T add \rn, \rn, \rm |
|
| 187 |
+.endm |
|
| 188 |
+ |
|
| 189 |
+.macro strd_post rt, rt2, rn, rm |
|
| 190 |
+A strd \rt, \rt2, [\rn], \rm |
|
| 191 |
+T strd \rt, \rt2, [\rn] |
|
| 192 |
+T add \rn, \rn, \rm |
|
| 193 |
+.endm |
|
| 194 |
+ |
|
| 195 |
+.macro strh_pre rt, rn, rm |
|
| 196 |
+A strh \rt, [\rn, \rm]! |
|
| 197 |
+T add \rn, \rn, \rm |
|
| 198 |
+T strh \rt, [\rn] |
|
| 199 |
+.endm |
|
| 200 |
+ |
|
| 201 |
+.macro strh_dpre rt, rn, rm |
|
| 202 |
+A strh \rt, [\rn, -\rm]! |
|
| 203 |
+T sub \rn, \rn, \rm |
|
| 204 |
+T strh \rt, [\rn] |
|
| 205 |
+.endm |
|
| 206 |
+ |
|
| 207 |
+.macro strh_post rt, rn, rm |
|
| 208 |
+A strh \rt, [\rn], \rm |
|
| 209 |
+T strh \rt, [\rn] |
|
| 210 |
+T add \rn, \rn, \rm |
|
| 211 |
+.endm |
|
| 212 |
+ |
|
| 213 |
+.macro strh_dpost rt, rn, rm |
|
| 214 |
+A strh \rt, [\rn], -\rm |
|
| 215 |
+T strh \rt, [\rn] |
|
| 216 |
+T sub \rn, \rn, \rm |
|
| 217 |
+.endm |
|
| 218 |
+ |
|
| 219 |
+#if HAVE_VFP_ARGS |
|
| 220 |
+ .eabi_attribute 28, 1 |
|
| 221 |
+# define VFP |
|
| 222 |
+# define NOVFP @ |
|
| 223 |
+#else |
|
| 224 |
+# define VFP @ |
|
| 225 |
+# define NOVFP |
|
| 226 |
+#endif |
|
| 227 |
+ |
|
| 228 |
+#define GLUE(a, b) a ## b |
|
| 229 |
+#define JOIN(a, b) GLUE(a, b) |
|
| 230 |
+#define X(s) JOIN(EXTERN_ASM, s) |
| 0 | 231 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,29 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H |
|
| 21 |
+#define AVUTIL_ARM_FLOAT_DSP_ARM_H |
|
| 22 |
+ |
|
| 23 |
+#include "libavutil/float_dsp.h" |
|
| 24 |
+ |
|
| 25 |
+void ff_float_dsp_init_vfp (AVFloatDSPContext *fdsp); |
|
| 26 |
+void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp); |
|
| 27 |
+ |
|
| 28 |
+#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */ |
| 0 | 29 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,33 @@ |
| 0 |
+/* |
|
| 1 |
+ * ARM optimized DSP utils |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "libavutil/arm/cpu.h" |
|
| 21 |
+#include "libavutil/float_dsp.h" |
|
| 22 |
+#include "float_dsp_arm.h" |
|
| 23 |
+ |
|
| 24 |
+void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) |
|
| 25 |
+{
|
|
| 26 |
+ int cpu_flags = av_get_cpu_flags(); |
|
| 27 |
+ |
|
| 28 |
+ if (have_vfp(cpu_flags)) |
|
| 29 |
+ ff_float_dsp_init_vfp(fdsp); |
|
| 30 |
+ if (have_neon(cpu_flags)) |
|
| 31 |
+ ff_float_dsp_init_neon(fdsp); |
|
| 32 |
+} |
| 0 | 33 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,32 @@ |
| 0 |
+/* |
|
| 1 |
+ * ARM NEON optimised Float DSP functions |
|
| 2 |
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
| 3 |
+ * |
|
| 4 |
+ * This file is part of Libav. |
|
| 5 |
+ * |
|
| 6 |
+ * Libav is free software; you can redistribute it and/or |
|
| 7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 8 |
+ * License as published by the Free Software Foundation; either |
|
| 9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 10 |
+ * |
|
| 11 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 14 |
+ * Lesser General Public License for more details. |
|
| 15 |
+ * |
|
| 16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 17 |
+ * License along with Libav; if not, write to the Free Software |
|
| 18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 19 |
+ */ |
|
| 20 |
+ |
|
| 21 |
+#include <stdint.h> |
|
| 22 |
+ |
|
| 23 |
+#include "libavutil/float_dsp.h" |
|
| 24 |
+#include "float_dsp_arm.h" |
|
| 25 |
+ |
|
| 26 |
+void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); |
|
| 27 |
+ |
|
| 28 |
+void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) |
|
| 29 |
+{
|
|
| 30 |
+ fdsp->vector_fmul = ff_vector_fmul_neon; |
|
| 31 |
+} |
| 0 | 32 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,34 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "libavutil/arm/cpu.h" |
|
| 21 |
+#include "libavutil/float_dsp.h" |
|
| 22 |
+#include "float_dsp_arm.h" |
|
| 23 |
+ |
|
| 24 |
+void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, |
|
| 25 |
+ int len); |
|
| 26 |
+ |
|
| 27 |
+void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp) |
|
| 28 |
+{
|
|
| 29 |
+ int cpu_flags = av_get_cpu_flags(); |
|
| 30 |
+ |
|
| 31 |
+ if (!have_vfpv3(cpu_flags)) |
|
| 32 |
+ fdsp->vector_fmul = ff_vector_fmul_vfp; |
|
| 33 |
+} |
| 0 | 34 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,64 @@ |
| 0 |
+/* |
|
| 1 |
+ * ARM NEON optimised Float DSP functions |
|
| 2 |
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
| 3 |
+ * |
|
| 4 |
+ * This file is part of Libav. |
|
| 5 |
+ * |
|
| 6 |
+ * Libav is free software; you can redistribute it and/or |
|
| 7 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 8 |
+ * License as published by the Free Software Foundation; either |
|
| 9 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 10 |
+ * |
|
| 11 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 12 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 13 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 14 |
+ * Lesser General Public License for more details. |
|
| 15 |
+ * |
|
| 16 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 17 |
+ * License along with Libav; if not, write to the Free Software |
|
| 18 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 19 |
+ */ |
|
| 20 |
+ |
|
| 21 |
+#include "config.h" |
|
| 22 |
+#include "asm.S" |
|
| 23 |
+ |
|
| 24 |
+ preserve8 |
|
| 25 |
+ |
|
| 26 |
+function ff_vector_fmul_neon, export=1 |
|
| 27 |
+ subs r3, r3, #8 |
|
| 28 |
+ vld1.32 {d0-d3}, [r1,:128]!
|
|
| 29 |
+ vld1.32 {d4-d7}, [r2,:128]!
|
|
| 30 |
+ vmul.f32 q8, q0, q2 |
|
| 31 |
+ vmul.f32 q9, q1, q3 |
|
| 32 |
+ beq 3f |
|
| 33 |
+ bics ip, r3, #15 |
|
| 34 |
+ beq 2f |
|
| 35 |
+1: subs ip, ip, #16 |
|
| 36 |
+ vld1.32 {d0-d1}, [r1,:128]!
|
|
| 37 |
+ vld1.32 {d4-d5}, [r2,:128]!
|
|
| 38 |
+ vmul.f32 q10, q0, q2 |
|
| 39 |
+ vld1.32 {d2-d3}, [r1,:128]!
|
|
| 40 |
+ vld1.32 {d6-d7}, [r2,:128]!
|
|
| 41 |
+ vmul.f32 q11, q1, q3 |
|
| 42 |
+ vst1.32 {d16-d19},[r0,:128]!
|
|
| 43 |
+ vld1.32 {d0-d1}, [r1,:128]!
|
|
| 44 |
+ vld1.32 {d4-d5}, [r2,:128]!
|
|
| 45 |
+ vmul.f32 q8, q0, q2 |
|
| 46 |
+ vld1.32 {d2-d3}, [r1,:128]!
|
|
| 47 |
+ vld1.32 {d6-d7}, [r2,:128]!
|
|
| 48 |
+ vmul.f32 q9, q1, q3 |
|
| 49 |
+ vst1.32 {d20-d23},[r0,:128]!
|
|
| 50 |
+ bne 1b |
|
| 51 |
+ ands r3, r3, #15 |
|
| 52 |
+ beq 3f |
|
| 53 |
+2: vld1.32 {d0-d1}, [r1,:128]!
|
|
| 54 |
+ vld1.32 {d4-d5}, [r2,:128]!
|
|
| 55 |
+ vst1.32 {d16-d17},[r0,:128]!
|
|
| 56 |
+ vmul.f32 q8, q0, q2 |
|
| 57 |
+ vld1.32 {d2-d3}, [r1,:128]!
|
|
| 58 |
+ vld1.32 {d6-d7}, [r2,:128]!
|
|
| 59 |
+ vst1.32 {d18-d19},[r0,:128]!
|
|
| 60 |
+ vmul.f32 q9, q1, q3 |
|
| 61 |
+3: vst1.32 {d16-d19},[r0,:128]!
|
|
| 62 |
+ bx lr |
|
| 63 |
+endfunc |
| 0 | 64 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,68 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "config.h" |
|
| 21 |
+#include "asm.S" |
|
| 22 |
+ |
|
| 23 |
+/** |
|
| 24 |
+ * Assume that len is a positive number and is multiple of 8 |
|
| 25 |
+ */ |
|
| 26 |
+@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len) |
|
| 27 |
+function ff_vector_fmul_vfp, export=1 |
|
| 28 |
+ vpush {d8-d15}
|
|
| 29 |
+ fmrx r12, fpscr |
|
| 30 |
+ orr r12, r12, #(3 << 16) /* set vector size to 4 */ |
|
| 31 |
+ fmxr fpscr, r12 |
|
| 32 |
+ |
|
| 33 |
+ vldmia r1!, {s0-s3}
|
|
| 34 |
+ vldmia r2!, {s8-s11}
|
|
| 35 |
+ vldmia r1!, {s4-s7}
|
|
| 36 |
+ vldmia r2!, {s12-s15}
|
|
| 37 |
+ vmul.f32 s8, s0, s8 |
|
| 38 |
+1: |
|
| 39 |
+ subs r3, r3, #16 |
|
| 40 |
+ vmul.f32 s12, s4, s12 |
|
| 41 |
+ itttt ge |
|
| 42 |
+ vldmiage r1!, {s16-s19}
|
|
| 43 |
+ vldmiage r2!, {s24-s27}
|
|
| 44 |
+ vldmiage r1!, {s20-s23}
|
|
| 45 |
+ vldmiage r2!, {s28-s31}
|
|
| 46 |
+ it ge |
|
| 47 |
+ vmulge.f32 s24, s16, s24 |
|
| 48 |
+ vstmia r0!, {s8-s11}
|
|
| 49 |
+ vstmia r0!, {s12-s15}
|
|
| 50 |
+ it ge |
|
| 51 |
+ vmulge.f32 s28, s20, s28 |
|
| 52 |
+ itttt gt |
|
| 53 |
+ vldmiagt r1!, {s0-s3}
|
|
| 54 |
+ vldmiagt r2!, {s8-s11}
|
|
| 55 |
+ vldmiagt r1!, {s4-s7}
|
|
| 56 |
+ vldmiagt r2!, {s12-s15}
|
|
| 57 |
+ ittt ge |
|
| 58 |
+ vmulge.f32 s8, s0, s8 |
|
| 59 |
+ vstmiage r0!, {s24-s27}
|
|
| 60 |
+ vstmiage r0!, {s28-s31}
|
|
| 61 |
+ bgt 1b |
|
| 62 |
+ |
|
| 63 |
+ bic r12, r12, #(7 << 16) /* set vector size back to 1 */ |
|
| 64 |
+ fmxr fpscr, r12 |
|
| 65 |
+ vpop {d8-d15}
|
|
| 66 |
+ bx lr |
|
| 67 |
+endfunc |
| 0 | 68 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,42 @@ |
| 0 |
+/* |
|
| 1 |
+ * This file is part of Libav. |
|
| 2 |
+ * |
|
| 3 |
+ * Libav is free software; you can redistribute it and/or |
|
| 4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 5 |
+ * License as published by the Free Software Foundation; either |
|
| 6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 7 |
+ * |
|
| 8 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 11 |
+ * Lesser General Public License for more details. |
|
| 12 |
+ * |
|
| 13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 14 |
+ * License along with Libav; if not, write to the Free Software |
|
| 15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 16 |
+ */ |
|
| 17 |
+ |
|
| 18 |
+#include "config.h" |
|
| 19 |
+ |
|
| 20 |
+#include "float_dsp.h" |
|
| 21 |
+ |
|
| 22 |
+static void vector_fmul_c(float *dst, const float *src0, const float *src1, |
|
| 23 |
+ int len) |
|
| 24 |
+{
|
|
| 25 |
+ int i; |
|
| 26 |
+ for (i = 0; i < len; i++) |
|
| 27 |
+ dst[i] = src0[i] * src1[i]; |
|
| 28 |
+} |
|
| 29 |
+ |
|
| 30 |
+void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) |
|
| 31 |
+{
|
|
| 32 |
+ fdsp->vector_fmul = vector_fmul_c; |
|
| 33 |
+ |
|
| 34 |
+#if ARCH_ARM |
|
| 35 |
+ ff_float_dsp_init_arm(fdsp); |
|
| 36 |
+#elif ARCH_PPC |
|
| 37 |
+ ff_float_dsp_init_ppc(fdsp, bit_exact); |
|
| 38 |
+#elif ARCH_X86 |
|
| 39 |
+ ff_float_dsp_init_x86(fdsp); |
|
| 40 |
+#endif |
|
| 41 |
+} |
| 0 | 42 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,53 @@ |
| 0 |
+/* |
|
| 1 |
+ * This file is part of FFmpeg. |
|
| 2 |
+ * |
|
| 3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 5 |
+ * License as published by the Free Software Foundation; either |
|
| 6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 7 |
+ * |
|
| 8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 11 |
+ * Lesser General Public License for more details. |
|
| 12 |
+ * |
|
| 13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 16 |
+ */ |
|
| 17 |
+ |
|
| 18 |
+#ifndef AVUTIL_FLOAT_DSP_H |
|
| 19 |
+#define AVUTIL_FLOAT_DSP_H |
|
| 20 |
+ |
|
| 21 |
+typedef struct AVFloatDSPContext {
|
|
| 22 |
+ /** |
|
| 23 |
+ * Calculate the product of two vectors of floats and store the result in |
|
| 24 |
+ * a vector of floats. |
|
| 25 |
+ * |
|
| 26 |
+ * @param dst output vector |
|
| 27 |
+ * constraints: 32-byte aligned |
|
| 28 |
+ * @param src0 first input vector |
|
| 29 |
+ * constraints: 32-byte aligned |
|
| 30 |
+ * @param src1 second input vector |
|
| 31 |
+ * constraints: 32-byte aligned |
|
| 32 |
+ * @param len number of elements in the input |
|
| 33 |
+ * constraints: multiple of 16 |
|
| 34 |
+ */ |
|
| 35 |
+ void (*vector_fmul)(float *dst, const float *src0, const float *src1, |
|
| 36 |
+ int len); |
|
| 37 |
+} AVFloatDSPContext; |
|
| 38 |
+ |
|
| 39 |
+/** |
|
| 40 |
+ * Initialize a float DSP context. |
|
| 41 |
+ * |
|
| 42 |
+ * @param fdsp float DSP context |
|
| 43 |
+ * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant |
|
| 44 |
+ */ |
|
| 45 |
+void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int strict); |
|
| 46 |
+ |
|
| 47 |
+ |
|
| 48 |
+void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp); |
|
| 49 |
+void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); |
|
| 50 |
+void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); |
|
| 51 |
+ |
|
| 52 |
+#endif /* AVUTIL_FLOAT_DSP_H */ |
| 2 | 5 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,38 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "util_altivec.h" |
|
| 21 |
+#include "float_dsp_altivec.h" |
|
| 22 |
+ |
|
| 23 |
+void ff_vector_fmul_altivec(float *dst, const float *src0, const float *src1, |
|
| 24 |
+ int len) |
|
| 25 |
+{
|
|
| 26 |
+ int i; |
|
| 27 |
+ vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); |
|
| 28 |
+ for (i = 0; i < len - 7; i += 8) {
|
|
| 29 |
+ d0 = vec_ld( 0, src0 + i); |
|
| 30 |
+ s = vec_ld( 0, src1 + i); |
|
| 31 |
+ d1 = vec_ld(16, src0 + i); |
|
| 32 |
+ d0 = vec_madd(d0, s, zero); |
|
| 33 |
+ d1 = vec_madd(d1, vec_ld(16, src1 + i), zero); |
|
| 34 |
+ vec_st(d0, 0, dst + i); |
|
| 35 |
+ vec_st(d1, 16, dst + i); |
|
| 36 |
+ } |
|
| 37 |
+} |
| 0 | 38 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,27 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#ifndef AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H |
|
| 21 |
+#define AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H |
|
| 22 |
+ |
|
| 23 |
+extern void ff_vector_fmul_altivec(float *dst, const float *src0, |
|
| 24 |
+ const float *src1, int len); |
|
| 25 |
+ |
|
| 26 |
+#endif /* AVUTIL_PPC_FLOAT_DSP_ALTIVEC_H */ |
| 0 | 27 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,36 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "config.h" |
|
| 21 |
+#include "libavutil/cpu.h" |
|
| 22 |
+#include "libavutil/float_dsp.h" |
|
| 23 |
+#include "float_dsp_altivec.h" |
|
| 24 |
+ |
|
| 25 |
+void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int bit_exact) |
|
| 26 |
+{
|
|
| 27 |
+#if HAVE_ALTIVEC |
|
| 28 |
+ int mm_flags = av_get_cpu_flags(); |
|
| 29 |
+ |
|
| 30 |
+ if (!(mm_flags & AV_CPU_FLAG_ALTIVEC)) |
|
| 31 |
+ return; |
|
| 32 |
+ |
|
| 33 |
+ fdsp->vector_fmul = ff_vector_fmul_altivec; |
|
| 34 |
+#endif |
|
| 35 |
+} |
| 0 | 36 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,47 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2006 Guillaume Poirier <gpoirier@mplayerhq.hu> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#ifndef AVUTIL_PPC_TYPES_ALTIVEC_H |
|
| 21 |
+#define AVUTIL_PPC_TYPES_ALTIVEC_H |
|
| 22 |
+ |
|
| 23 |
+/*********************************************************************** |
|
| 24 |
+ * Vector types |
|
| 25 |
+ **********************************************************************/ |
|
| 26 |
+#define vec_u8 vector unsigned char |
|
| 27 |
+#define vec_s8 vector signed char |
|
| 28 |
+#define vec_u16 vector unsigned short |
|
| 29 |
+#define vec_s16 vector signed short |
|
| 30 |
+#define vec_u32 vector unsigned int |
|
| 31 |
+#define vec_s32 vector signed int |
|
| 32 |
+#define vec_f vector float |
|
| 33 |
+ |
|
| 34 |
+/*********************************************************************** |
|
| 35 |
+ * Null vector |
|
| 36 |
+ **********************************************************************/ |
|
| 37 |
+#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 ) |
|
| 38 |
+ |
|
| 39 |
+#define zero_u8v (vec_u8) zerov |
|
| 40 |
+#define zero_s8v (vec_s8) zerov |
|
| 41 |
+#define zero_u16v (vec_u16) zerov |
|
| 42 |
+#define zero_s16v (vec_s16) zerov |
|
| 43 |
+#define zero_u32v (vec_u32) zerov |
|
| 44 |
+#define zero_s32v (vec_s32) zerov |
|
| 45 |
+ |
|
| 46 |
+#endif /* AVUTIL_PPC_TYPES_ALTIVEC_H */ |
| 0 | 47 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,118 @@ |
| 0 |
+/* |
|
| 1 |
+ * This file is part of FFmpeg. |
|
| 2 |
+ * |
|
| 3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 5 |
+ * License as published by the Free Software Foundation; either |
|
| 6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 7 |
+ * |
|
| 8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 11 |
+ * Lesser General Public License for more details. |
|
| 12 |
+ * |
|
| 13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 16 |
+ */ |
|
| 17 |
+ |
|
| 18 |
+/** |
|
| 19 |
+ * @file |
|
| 20 |
+ * Contains misc utility macros and inline functions |
|
| 21 |
+ */ |
|
| 22 |
+ |
|
| 23 |
+#ifndef AVUTIL_PPC_UTIL_ALTIVEC_H |
|
| 24 |
+#define AVUTIL_PPC_UTIL_ALTIVEC_H |
|
| 25 |
+ |
|
| 26 |
+#include <stdint.h> |
|
| 27 |
+ |
|
| 28 |
+#include "config.h" |
|
| 29 |
+ |
|
| 30 |
+#if HAVE_ALTIVEC_H |
|
| 31 |
+#include <altivec.h> |
|
| 32 |
+#endif |
|
| 33 |
+ |
|
| 34 |
+#include "types_altivec.h" |
|
| 35 |
+ |
|
| 36 |
+// used to build registers permutation vectors (vcprm) |
|
| 37 |
+// the 's' are for words in the _s_econd vector |
|
| 38 |
+#define WORD_0 0x00,0x01,0x02,0x03 |
|
| 39 |
+#define WORD_1 0x04,0x05,0x06,0x07 |
|
| 40 |
+#define WORD_2 0x08,0x09,0x0a,0x0b |
|
| 41 |
+#define WORD_3 0x0c,0x0d,0x0e,0x0f |
|
| 42 |
+#define WORD_s0 0x10,0x11,0x12,0x13 |
|
| 43 |
+#define WORD_s1 0x14,0x15,0x16,0x17 |
|
| 44 |
+#define WORD_s2 0x18,0x19,0x1a,0x1b |
|
| 45 |
+#define WORD_s3 0x1c,0x1d,0x1e,0x1f |
|
| 46 |
+ |
|
| 47 |
+#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
|
|
| 48 |
+#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
|
|
| 49 |
+ |
|
| 50 |
+// vcprmle is used to keep the same index as in the SSE version. |
|
| 51 |
+// it's the same as vcprm, with the index inversed |
|
| 52 |
+// ('le' is Little Endian)
|
|
| 53 |
+#define vcprmle(a,b,c,d) vcprm(d,c,b,a) |
|
| 54 |
+ |
|
| 55 |
+// used to build inverse/identity vectors (vcii) |
|
| 56 |
+// n is _n_egative, p is _p_ositive |
|
| 57 |
+#define FLOAT_n -1. |
|
| 58 |
+#define FLOAT_p 1. |
|
| 59 |
+ |
|
| 60 |
+ |
|
| 61 |
+// Transpose 8x8 matrix of 16-bit elements (in-place) |
|
| 62 |
+#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ |
|
| 63 |
+do { \
|
|
| 64 |
+ vector signed short A1, B1, C1, D1, E1, F1, G1, H1; \ |
|
| 65 |
+ vector signed short A2, B2, C2, D2, E2, F2, G2, H2; \ |
|
| 66 |
+ \ |
|
| 67 |
+ A1 = vec_mergeh (a, e); \ |
|
| 68 |
+ B1 = vec_mergel (a, e); \ |
|
| 69 |
+ C1 = vec_mergeh (b, f); \ |
|
| 70 |
+ D1 = vec_mergel (b, f); \ |
|
| 71 |
+ E1 = vec_mergeh (c, g); \ |
|
| 72 |
+ F1 = vec_mergel (c, g); \ |
|
| 73 |
+ G1 = vec_mergeh (d, h); \ |
|
| 74 |
+ H1 = vec_mergel (d, h); \ |
|
| 75 |
+ \ |
|
| 76 |
+ A2 = vec_mergeh (A1, E1); \ |
|
| 77 |
+ B2 = vec_mergel (A1, E1); \ |
|
| 78 |
+ C2 = vec_mergeh (B1, F1); \ |
|
| 79 |
+ D2 = vec_mergel (B1, F1); \ |
|
| 80 |
+ E2 = vec_mergeh (C1, G1); \ |
|
| 81 |
+ F2 = vec_mergel (C1, G1); \ |
|
| 82 |
+ G2 = vec_mergeh (D1, H1); \ |
|
| 83 |
+ H2 = vec_mergel (D1, H1); \ |
|
| 84 |
+ \ |
|
| 85 |
+ a = vec_mergeh (A2, E2); \ |
|
| 86 |
+ b = vec_mergel (A2, E2); \ |
|
| 87 |
+ c = vec_mergeh (B2, F2); \ |
|
| 88 |
+ d = vec_mergel (B2, F2); \ |
|
| 89 |
+ e = vec_mergeh (C2, G2); \ |
|
| 90 |
+ f = vec_mergel (C2, G2); \ |
|
| 91 |
+ g = vec_mergeh (D2, H2); \ |
|
| 92 |
+ h = vec_mergel (D2, H2); \ |
|
| 93 |
+} while (0) |
|
| 94 |
+ |
|
| 95 |
+ |
|
| 96 |
+/** @brief loads unaligned vector @a *src with offset @a offset |
|
| 97 |
+ and returns it */ |
|
| 98 |
+static inline vector unsigned char unaligned_load(int offset, uint8_t *src) |
|
| 99 |
+{
|
|
| 100 |
+ register vector unsigned char first = vec_ld(offset, src); |
|
| 101 |
+ register vector unsigned char second = vec_ld(offset+15, src); |
|
| 102 |
+ register vector unsigned char mask = vec_lvsl(offset, src); |
|
| 103 |
+ return vec_perm(first, second, mask); |
|
| 104 |
+} |
|
| 105 |
+ |
|
| 106 |
+/** |
|
| 107 |
+ * loads vector known misalignment |
|
| 108 |
+ * @param perm_vec the align permute vector to combine the two loads from lvsl |
|
| 109 |
+ */ |
|
| 110 |
+static inline vec_u8 load_with_perm_vec(int offset, uint8_t *src, vec_u8 perm_vec) |
|
| 111 |
+{
|
|
| 112 |
+ vec_u8 a = vec_ld(offset, src); |
|
| 113 |
+ vec_u8 b = vec_ld(offset+15, src); |
|
| 114 |
+ return vec_perm(a, b, perm_vec); |
|
| 115 |
+} |
|
| 116 |
+ |
|
| 117 |
+#endif /* AVUTIL_PPC_UTIL_ALTIVEC_H */ |
| 2 | 5 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,55 @@ |
| 0 |
+;***************************************************************************** |
|
| 1 |
+;* x86-optimized Float DSP functions |
|
| 2 |
+;* |
|
| 3 |
+;* This file is part of Libav. |
|
| 4 |
+;* |
|
| 5 |
+;* Libav is free software; you can redistribute it and/or |
|
| 6 |
+;* modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+;* License as published by the Free Software Foundation; either |
|
| 8 |
+;* version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+;* |
|
| 10 |
+;* Libav is distributed in the hope that it will be useful, |
|
| 11 |
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+;* Lesser General Public License for more details. |
|
| 14 |
+;* |
|
| 15 |
+;* You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+;* License along with Libav; if not, write to the Free Software |
|
| 17 |
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+;****************************************************************************** |
|
| 19 |
+ |
|
| 20 |
+%include "x86inc.asm" |
|
| 21 |
+ |
|
| 22 |
+SECTION .text |
|
| 23 |
+ |
|
| 24 |
+;----------------------------------------------------------------------------- |
|
| 25 |
+; void vector_fmul(float *dst, const float *src0, const float *src1, int len) |
|
| 26 |
+;----------------------------------------------------------------------------- |
|
| 27 |
+%macro VECTOR_FMUL 0 |
|
| 28 |
+cglobal vector_fmul, 4,4,2, dst, src0, src1, len |
|
| 29 |
+ lea lenq, [lend*4 - 2*mmsize] |
|
| 30 |
+ALIGN 16 |
|
| 31 |
+.loop |
|
| 32 |
+ mova m0, [src0q + lenq] |
|
| 33 |
+ mova m1, [src0q + lenq + mmsize] |
|
| 34 |
+ mulps m0, m0, [src1q + lenq] |
|
| 35 |
+ mulps m1, m1, [src1q + lenq + mmsize] |
|
| 36 |
+ mova [dstq + lenq], m0 |
|
| 37 |
+ mova [dstq + lenq + mmsize], m1 |
|
| 38 |
+ |
|
| 39 |
+ sub lenq, 2*mmsize |
|
| 40 |
+ jge .loop |
|
| 41 |
+%if mmsize == 32 |
|
| 42 |
+ vzeroupper |
|
| 43 |
+ RET |
|
| 44 |
+%else |
|
| 45 |
+ REP_RET |
|
| 46 |
+%endif |
|
| 47 |
+%endmacro |
|
| 48 |
+ |
|
| 49 |
+INIT_XMM sse |
|
| 50 |
+VECTOR_FMUL |
|
| 51 |
+%if HAVE_AVX |
|
| 52 |
+INIT_YMM avx |
|
| 53 |
+VECTOR_FMUL |
|
| 54 |
+%endif |
| 0 | 55 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,41 @@ |
| 0 |
+/* |
|
| 1 |
+ * This file is part of FFmpeg. |
|
| 2 |
+ * |
|
| 3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 5 |
+ * License as published by the Free Software Foundation; either |
|
| 6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 7 |
+ * |
|
| 8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 11 |
+ * Lesser General Public License for more details. |
|
| 12 |
+ * |
|
| 13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 16 |
+ */ |
|
| 17 |
+ |
|
| 18 |
+#include "config.h" |
|
| 19 |
+ |
|
| 20 |
+#include "libavutil/cpu.h" |
|
| 21 |
+#include "libavutil/float_dsp.h" |
|
| 22 |
+ |
|
| 23 |
+extern void ff_vector_fmul_sse(float *dst, const float *src0, const float *src1, |
|
| 24 |
+ int len); |
|
| 25 |
+extern void ff_vector_fmul_avx(float *dst, const float *src0, const float *src1, |
|
| 26 |
+ int len); |
|
| 27 |
+ |
|
| 28 |
+void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) |
|
| 29 |
+{
|
|
| 30 |
+#if HAVE_YASM |
|
| 31 |
+ int mm_flags = av_get_cpu_flags(); |
|
| 32 |
+ |
|
| 33 |
+ if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
|
|
| 34 |
+ fdsp->vector_fmul = ff_vector_fmul_sse; |
|
| 35 |
+ } |
|
| 36 |
+ if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
|
|
| 37 |
+ fdsp->vector_fmul = ff_vector_fmul_avx; |
|
| 38 |
+ } |
|
| 39 |
+#endif |
|
| 40 |
+} |