Originally committed as revision 19864 to svn://svn.ffmpeg.org/ffmpeg/trunk
| ... | ... |
@@ -465,6 +465,7 @@ OBJS-$(HAVE_MMX) += x86/cpuid.o \ |
| 465 | 465 |
x86/dnxhd_mmx.o \ |
| 466 | 466 |
x86/dsputil_mmx.o \ |
| 467 | 467 |
x86/fdct_mmx.o \ |
| 468 |
+ x86/fft.o \ |
|
| 468 | 469 |
x86/idct_mmx_xvid.o \ |
| 469 | 470 |
x86/idct_sse2_xvid.o \ |
| 470 | 471 |
x86/motion_est_mmx.o \ |
| ... | ... |
@@ -480,6 +481,7 @@ OBJS-$(ARCH_ALPHA) += alpha/dsputil_alpha.o \ |
| 480 | 480 |
|
| 481 | 481 |
OBJS-$(ARCH_ARM) += arm/dsputil_arm.o \ |
| 482 | 482 |
arm/dsputil_arm_s.o \ |
| 483 |
+ arm/fft_init_arm.o \ |
|
| 483 | 484 |
arm/jrevdct_arm.o \ |
| 484 | 485 |
arm/mpegvideo_arm.o \ |
| 485 | 486 |
arm/simple_idct_arm.o \ |
| 486 | 487 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,39 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of FFmpeg. |
|
| 4 |
+ * |
|
| 5 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "libavcodec/dsputil.h" |
|
| 21 |
+ |
|
| 22 |
+void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); |
|
| 23 |
+void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); |
|
| 24 |
+ |
|
| 25 |
+void ff_imdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 26 |
+void ff_imdct_half_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 27 |
+void ff_mdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 28 |
+ |
|
| 29 |
+av_cold void ff_fft_init_arm(FFTContext *s) |
|
| 30 |
+{
|
|
| 31 |
+ if (HAVE_NEON) {
|
|
| 32 |
+ s->fft_permute = ff_fft_permute_neon; |
|
| 33 |
+ s->fft_calc = ff_fft_calc_neon; |
|
| 34 |
+ s->imdct_calc = ff_imdct_calc_neon; |
|
| 35 |
+ s->imdct_half = ff_imdct_half_neon; |
|
| 36 |
+ s->mdct_calc = ff_mdct_calc_neon; |
|
| 37 |
+ } |
|
| 38 |
+} |
| ... | ... |
@@ -683,6 +683,7 @@ typedef struct FFTContext {
|
| 683 | 683 |
void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); |
| 684 | 684 |
void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); |
| 685 | 685 |
void (*mdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input); |
| 686 |
+ int split_radix; |
|
| 686 | 687 |
} FFTContext; |
| 687 | 688 |
|
| 688 | 689 |
extern FFTSample* const ff_cos_tabs[13]; |
| ... | ... |
@@ -694,14 +695,11 @@ extern FFTSample* const ff_cos_tabs[13]; |
| 694 | 694 |
*/ |
| 695 | 695 |
int ff_fft_init(FFTContext *s, int nbits, int inverse); |
| 696 | 696 |
void ff_fft_permute_c(FFTContext *s, FFTComplex *z); |
| 697 |
-void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); |
|
| 698 |
-void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); |
|
| 699 | 697 |
void ff_fft_calc_c(FFTContext *s, FFTComplex *z); |
| 700 |
-void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); |
|
| 701 |
-void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); |
|
| 702 |
-void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); |
|
| 703 |
-void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); |
|
| 704 |
-void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); |
|
| 698 |
+ |
|
| 699 |
+void ff_fft_init_altivec(FFTContext *s); |
|
| 700 |
+void ff_fft_init_mmx(FFTContext *s); |
|
| 701 |
+void ff_fft_init_neon(FFTContext *s); |
|
| 705 | 702 |
|
| 706 | 703 |
/** |
| 707 | 704 |
* Do the permutation needed BEFORE calling ff_fft_calc(). |
| ... | ... |
@@ -774,15 +772,6 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse, double scale); |
| 774 | 774 |
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); |
| 775 | 775 |
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input); |
| 776 | 776 |
void ff_mdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input); |
| 777 |
-void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 778 |
-void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 779 |
-void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 780 |
-void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 781 |
-void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 782 |
-void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 783 |
-void ff_imdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 784 |
-void ff_imdct_half_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 785 |
-void ff_mdct_calc_neon(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 786 | 777 |
void ff_mdct_end(MDCTContext *s); |
| 787 | 778 |
|
| 788 | 779 |
/* Real Discrete Fourier Transform */ |
| ... | ... |
@@ -62,7 +62,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
| 62 | 62 |
{
|
| 63 | 63 |
int i, j, m, n; |
| 64 | 64 |
float alpha, c1, s1, s2; |
| 65 |
- int split_radix = 1; |
|
| 66 | 65 |
int av_unused has_vectors; |
| 67 | 66 |
|
| 68 | 67 |
if (nbits < 2 || nbits > 16) |
| ... | ... |
@@ -87,41 +86,13 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
| 87 | 87 |
s->imdct_half = ff_imdct_half_c; |
| 88 | 88 |
s->mdct_calc = ff_mdct_calc_c; |
| 89 | 89 |
s->exptab1 = NULL; |
| 90 |
+ s->split_radix = 1; |
|
| 90 | 91 |
|
| 91 |
-#if HAVE_MMX && HAVE_YASM |
|
| 92 |
- has_vectors = mm_support(); |
|
| 93 |
- if (has_vectors & FF_MM_SSE && HAVE_SSE) {
|
|
| 94 |
- /* SSE for P3/P4/K8 */ |
|
| 95 |
- s->imdct_calc = ff_imdct_calc_sse; |
|
| 96 |
- s->imdct_half = ff_imdct_half_sse; |
|
| 97 |
- s->fft_permute = ff_fft_permute_sse; |
|
| 98 |
- s->fft_calc = ff_fft_calc_sse; |
|
| 99 |
- } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
|
|
| 100 |
- /* 3DNowEx for K7 */ |
|
| 101 |
- s->imdct_calc = ff_imdct_calc_3dn2; |
|
| 102 |
- s->imdct_half = ff_imdct_half_3dn2; |
|
| 103 |
- s->fft_calc = ff_fft_calc_3dn2; |
|
| 104 |
- } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
|
|
| 105 |
- /* 3DNow! for K6-2/3 */ |
|
| 106 |
- s->imdct_calc = ff_imdct_calc_3dn; |
|
| 107 |
- s->imdct_half = ff_imdct_half_3dn; |
|
| 108 |
- s->fft_calc = ff_fft_calc_3dn; |
|
| 109 |
- } |
|
| 110 |
-#elif HAVE_ALTIVEC |
|
| 111 |
- has_vectors = mm_support(); |
|
| 112 |
- if (has_vectors & FF_MM_ALTIVEC) {
|
|
| 113 |
- s->fft_calc = ff_fft_calc_altivec; |
|
| 114 |
- split_radix = 0; |
|
| 115 |
- } |
|
| 116 |
-#elif HAVE_NEON |
|
| 117 |
- s->fft_permute = ff_fft_permute_neon; |
|
| 118 |
- s->fft_calc = ff_fft_calc_neon; |
|
| 119 |
- s->imdct_calc = ff_imdct_calc_neon; |
|
| 120 |
- s->imdct_half = ff_imdct_half_neon; |
|
| 121 |
- s->mdct_calc = ff_mdct_calc_neon; |
|
| 122 |
-#endif |
|
| 92 |
+ if (ARCH_ARM) ff_fft_init_arm(s); |
|
| 93 |
+ if (HAVE_ALTIVEC) ff_fft_init_altivec(s); |
|
| 94 |
+ if (HAVE_MMX) ff_fft_init_mmx(s); |
|
| 123 | 95 |
|
| 124 |
- if (split_radix) {
|
|
| 96 |
+ if (s->split_radix) {
|
|
| 125 | 97 |
for(j=4; j<=nbits; j++) {
|
| 126 | 98 |
int m = 1<<j; |
| 127 | 99 |
double freq = 2*M_PI/m; |
| ... | ... |
@@ -133,3 +133,9 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); |
| 133 | 133 |
|
| 134 | 134 |
POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); |
| 135 | 135 |
} |
| 136 |
+ |
|
| 137 |
+av_cold void ff_fft_init_altivec(FFTContext *s) |
|
| 138 |
+{
|
|
| 139 |
+ s->fft_calc = ff_fft_calc_altivec; |
|
| 140 |
+ s->split_radix = 0; |
|
| 141 |
+} |
| 136 | 142 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,44 @@ |
| 0 |
+/* |
|
| 1 |
+ * This file is part of FFmpeg. |
|
| 2 |
+ * |
|
| 3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 5 |
+ * License as published by the Free Software Foundation; either |
|
| 6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 7 |
+ * |
|
| 8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 11 |
+ * Lesser General Public License for more details. |
|
| 12 |
+ * |
|
| 13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 16 |
+ */ |
|
| 17 |
+ |
|
| 18 |
+#include "libavcodec/dsputil.h" |
|
| 19 |
+#include "fft.h" |
|
| 20 |
+ |
|
| 21 |
+av_cold void ff_fft_init_mmx(FFTContext *s) |
|
| 22 |
+{
|
|
| 23 |
+#if HAVE_YASM |
|
| 24 |
+ int has_vectors = mm_support(); |
|
| 25 |
+ if (has_vectors & FF_MM_SSE && HAVE_SSE) {
|
|
| 26 |
+ /* SSE for P3/P4/K8 */ |
|
| 27 |
+ s->imdct_calc = ff_imdct_calc_sse; |
|
| 28 |
+ s->imdct_half = ff_imdct_half_sse; |
|
| 29 |
+ s->fft_permute = ff_fft_permute_sse; |
|
| 30 |
+ s->fft_calc = ff_fft_calc_sse; |
|
| 31 |
+ } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
|
|
| 32 |
+ /* 3DNowEx for K7 */ |
|
| 33 |
+ s->imdct_calc = ff_imdct_calc_3dn2; |
|
| 34 |
+ s->imdct_half = ff_imdct_half_3dn2; |
|
| 35 |
+ s->fft_calc = ff_fft_calc_3dn2; |
|
| 36 |
+ } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
|
|
| 37 |
+ /* 3DNow! for K6-2/3 */ |
|
| 38 |
+ s->imdct_calc = ff_imdct_calc_3dn; |
|
| 39 |
+ s->imdct_half = ff_imdct_half_3dn; |
|
| 40 |
+ s->fft_calc = ff_fft_calc_3dn; |
|
| 41 |
+ } |
|
| 42 |
+#endif |
|
| 43 |
+} |
| 0 | 44 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,36 @@ |
| 0 |
+/* |
|
| 1 |
+ * This file is part of FFmpeg. |
|
| 2 |
+ * |
|
| 3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
| 4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 5 |
+ * License as published by the Free Software Foundation; either |
|
| 6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 7 |
+ * |
|
| 8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
| 9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 11 |
+ * Lesser General Public License for more details. |
|
| 12 |
+ * |
|
| 13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
| 15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 16 |
+ */ |
|
| 17 |
+ |
|
| 18 |
+#ifndef AVCODEC_X86_FFT_H |
|
| 19 |
+#define AVCODEC_X86_FFT_H |
|
| 20 |
+ |
|
| 21 |
+#include "libavcodec/dsputil.h" |
|
| 22 |
+ |
|
| 23 |
+void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); |
|
| 24 |
+void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); |
|
| 25 |
+void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); |
|
| 26 |
+void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); |
|
| 27 |
+ |
|
| 28 |
+void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 29 |
+void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 30 |
+void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 31 |
+void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 32 |
+void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 33 |
+void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input); |
|
| 34 |
+ |
|
| 35 |
+#endif |