GitList

Browse code

dct/fft: Give consistent names to fixed/float template files

Diego Biurrun authored on 2013/11/20 05:17:53
Showing 12 changed files

libavcodec/dct32.c index 272e0db..0000000
libavcodec/dct32_fixed.c index 7eb9dc1..64efe8b 100644
libavcodec/dct32_float.c index 727ec3c..ef37ce9 100644
libavcodec/dct32_template.c index 0000000..272e0db
libavcodec/fft.c index 0b8140a..0000000
libavcodec/fft_fixed.c index b28091d..91dc69d 100644
libavcodec/fft_float.c index 24c9fdb..213da9f 100644
libavcodec/fft_template.c index 0000000..0b8140a
libavcodec/mdct.c index 6f64534..0000000
libavcodec/mdct_fixed.c index 94527f9..15dfcd4 100644
libavcodec/mdct_float.c index e4f5549..f8955f6 100644
libavcodec/mdct_template.c index 0000000..6f64534

                     deleted file mode 100644
@@ -1,276 +0,0 @@
                     -/*
                     - * Template for the Discrete Cosine Transform for 32 samples
                     - * Copyright (c) 2001, 2002 Fabrice Bellard
                     - *
                     - * This file is part of Libav.
                     - *
                     - * Libav is free software; you can redistribute it and/or
                     - * modify it under the terms of the GNU Lesser General Public
                     - * License as published by the Free Software Foundation; either
                     - * version 2.1 of the License, or (at your option) any later version.
                     - *
                     - * Libav is distributed in the hope that it will be useful,
                     - * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     - * Lesser General Public License for more details.
                     - *
                     - * You should have received a copy of the GNU Lesser General Public
                     - * License along with Libav; if not, write to the Free Software
                     - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     - */
+                    -
                     -#include "dct32.h"
                     -#include "mathops.h"
+                    -
                     -#if DCT32_FLOAT
                     -#   define dct32 ff_dct32_float
                     -#   define FIXHR(x)       ((float)(x))
                     -#   define MULH3(x, y, s) ((s)*(y)*(x))
                     -#   define INTFLOAT float
                     -#else
                     -#   define dct32 ff_dct32_fixed
                     -#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
                     -#   define MULH3(x, y, s) MULH((s)*(x), y)
                     -#   define INTFLOAT int
                     -#endif
+                    -
+                    -
                     -/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+                    -
                     -/* cos(i*pi/64) */
+                    -
                     -#define COS0_0  FIXHR(0.50060299823519630134/2)
                     -#define COS0_1  FIXHR(0.50547095989754365998/2)
                     -#define COS0_2  FIXHR(0.51544730992262454697/2)
                     -#define COS0_3  FIXHR(0.53104259108978417447/2)
                     -#define COS0_4  FIXHR(0.55310389603444452782/2)
                     -#define COS0_5  FIXHR(0.58293496820613387367/2)
                     -#define COS0_6  FIXHR(0.62250412303566481615/2)
                     -#define COS0_7  FIXHR(0.67480834145500574602/2)
                     -#define COS0_8  FIXHR(0.74453627100229844977/2)
                     -#define COS0_9  FIXHR(0.83934964541552703873/2)
                     -#define COS0_10 FIXHR(0.97256823786196069369/2)
                     -#define COS0_11 FIXHR(1.16943993343288495515/4)
                     -#define COS0_12 FIXHR(1.48416461631416627724/4)
                     -#define COS0_13 FIXHR(2.05778100995341155085/8)
                     -#define COS0_14 FIXHR(3.40760841846871878570/8)
                     -#define COS0_15 FIXHR(10.19000812354805681150/32)
+                    -
                     -#define COS1_0 FIXHR(0.50241928618815570551/2)
                     -#define COS1_1 FIXHR(0.52249861493968888062/2)
                     -#define COS1_2 FIXHR(0.56694403481635770368/2)
                     -#define COS1_3 FIXHR(0.64682178335999012954/2)
                     -#define COS1_4 FIXHR(0.78815462345125022473/2)
                     -#define COS1_5 FIXHR(1.06067768599034747134/4)
                     -#define COS1_6 FIXHR(1.72244709823833392782/4)
                     -#define COS1_7 FIXHR(5.10114861868916385802/16)
+                    -
                     -#define COS2_0 FIXHR(0.50979557910415916894/2)
                     -#define COS2_1 FIXHR(0.60134488693504528054/2)
                     -#define COS2_2 FIXHR(0.89997622313641570463/2)
                     -#define COS2_3 FIXHR(2.56291544774150617881/8)
+                    -
                     -#define COS3_0 FIXHR(0.54119610014619698439/2)
                     -#define COS3_1 FIXHR(1.30656296487637652785/4)
+                    -
                     -#define COS4_0 FIXHR(0.70710678118654752439/2)
+                    -
                     -/* butterfly operator */
                     -#define BF(a, b, c, s)\
                     -{\
                     -    tmp0 = val##a + val##b;\
                     -    tmp1 = val##a - val##b;\
                     -    val##a = tmp0;\
                     -    val##b = MULH3(tmp1, c, 1<<(s));\
                     -}
+                    -
                     -#define BF0(a, b, c, s)\
                     -{\
                     -    tmp0 = tab[a] + tab[b];\
                     -    tmp1 = tab[a] - tab[b];\
                     -    val##a = tmp0;\
                     -    val##b = MULH3(tmp1, c, 1<<(s));\
                     -}
+                    -
                     -#define BF1(a, b, c, d)\
                     -{\
                     -    BF(a, b, COS4_0, 1);\
                     -    BF(c, d,-COS4_0, 1);\
                     -    val##c += val##d;\
                     -}
+                    -
                     -#define BF2(a, b, c, d)\
                     -{\
                     -    BF(a, b, COS4_0, 1);\
                     -    BF(c, d,-COS4_0, 1);\
                     -    val##c += val##d;\
                     -    val##a += val##c;\
                     -    val##c += val##b;\
                     -    val##b += val##d;\
                     -}
+                    -
                     -#define ADD(a, b) val##a += val##b
+                    -
                     -/* DCT32 without 1/sqrt(2) coef zero scaling. */
                     -void dct32(INTFLOAT *out, const INTFLOAT *tab)
                     -{
                     -    INTFLOAT tmp0, tmp1;
+                    -
                     -    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
                     -             val8 , val9 , val10, val11, val12, val13, val14, val15,
                     -             val16, val17, val18, val19, val20, val21, val22, val23,
                     -             val24, val25, val26, val27, val28, val29, val30, val31;
+                    -
                     -    /* pass 1 */
                     -    BF0( 0, 31, COS0_0 , 1);
                     -    BF0(15, 16, COS0_15, 5);
                     -    /* pass 2 */
                     -    BF( 0, 15, COS1_0 , 1);
                     -    BF(16, 31,-COS1_0 , 1);
                     -    /* pass 1 */
                     -    BF0( 7, 24, COS0_7 , 1);
                     -    BF0( 8, 23, COS0_8 , 1);
                     -    /* pass 2 */
                     -    BF( 7,  8, COS1_7 , 4);
                     -    BF(23, 24,-COS1_7 , 4);
                     -    /* pass 3 */
                     -    BF( 0,  7, COS2_0 , 1);
                     -    BF( 8, 15,-COS2_0 , 1);
                     -    BF(16, 23, COS2_0 , 1);
                     -    BF(24, 31,-COS2_0 , 1);
                     -    /* pass 1 */
                     -    BF0( 3, 28, COS0_3 , 1);
                     -    BF0(12, 19, COS0_12, 2);
                     -    /* pass 2 */
                     -    BF( 3, 12, COS1_3 , 1);
                     -    BF(19, 28,-COS1_3 , 1);
                     -    /* pass 1 */
                     -    BF0( 4, 27, COS0_4 , 1);
                     -    BF0(11, 20, COS0_11, 2);
                     -    /* pass 2 */
                     -    BF( 4, 11, COS1_4 , 1);
                     -    BF(20, 27,-COS1_4 , 1);
                     -    /* pass 3 */
                     -    BF( 3,  4, COS2_3 , 3);
                     -    BF(11, 12,-COS2_3 , 3);
                     -    BF(19, 20, COS2_3 , 3);
                     -    BF(27, 28,-COS2_3 , 3);
                     -    /* pass 4 */
                     -    BF( 0,  3, COS3_0 , 1);
                     -    BF( 4,  7,-COS3_0 , 1);
                     -    BF( 8, 11, COS3_0 , 1);
                     -    BF(12, 15,-COS3_0 , 1);
                     -    BF(16, 19, COS3_0 , 1);
                     -    BF(20, 23,-COS3_0 , 1);
                     -    BF(24, 27, COS3_0 , 1);
                     -    BF(28, 31,-COS3_0 , 1);
+                    -
+                    -
+                    -
                     -    /* pass 1 */
                     -    BF0( 1, 30, COS0_1 , 1);
                     -    BF0(14, 17, COS0_14, 3);
                     -    /* pass 2 */
                     -    BF( 1, 14, COS1_1 , 1);
                     -    BF(17, 30,-COS1_1 , 1);
                     -    /* pass 1 */
                     -    BF0( 6, 25, COS0_6 , 1);
                     -    BF0( 9, 22, COS0_9 , 1);
                     -    /* pass 2 */
                     -    BF( 6,  9, COS1_6 , 2);
                     -    BF(22, 25,-COS1_6 , 2);
                     -    /* pass 3 */
                     -    BF( 1,  6, COS2_1 , 1);
                     -    BF( 9, 14,-COS2_1 , 1);
                     -    BF(17, 22, COS2_1 , 1);
                     -    BF(25, 30,-COS2_1 , 1);
+                    -
                     -    /* pass 1 */
                     -    BF0( 2, 29, COS0_2 , 1);
                     -    BF0(13, 18, COS0_13, 3);
                     -    /* pass 2 */
                     -    BF( 2, 13, COS1_2 , 1);
                     -    BF(18, 29,-COS1_2 , 1);
                     -    /* pass 1 */
                     -    BF0( 5, 26, COS0_5 , 1);
                     -    BF0(10, 21, COS0_10, 1);
                     -    /* pass 2 */
                     -    BF( 5, 10, COS1_5 , 2);
                     -    BF(21, 26,-COS1_5 , 2);
                     -    /* pass 3 */
                     -    BF( 2,  5, COS2_2 , 1);
                     -    BF(10, 13,-COS2_2 , 1);
                     -    BF(18, 21, COS2_2 , 1);
                     -    BF(26, 29,-COS2_2 , 1);
                     -    /* pass 4 */
                     -    BF( 1,  2, COS3_1 , 2);
                     -    BF( 5,  6,-COS3_1 , 2);
                     -    BF( 9, 10, COS3_1 , 2);
                     -    BF(13, 14,-COS3_1 , 2);
                     -    BF(17, 18, COS3_1 , 2);
                     -    BF(21, 22,-COS3_1 , 2);
                     -    BF(25, 26, COS3_1 , 2);
                     -    BF(29, 30,-COS3_1 , 2);
+                    -
                     -    /* pass 5 */
                     -    BF1( 0,  1,  2,  3);
                     -    BF2( 4,  5,  6,  7);
                     -    BF1( 8,  9, 10, 11);
                     -    BF2(12, 13, 14, 15);
                     -    BF1(16, 17, 18, 19);
                     -    BF2(20, 21, 22, 23);
                     -    BF1(24, 25, 26, 27);
                     -    BF2(28, 29, 30, 31);
+                    -
                     -    /* pass 6 */
+                    -
                     -    ADD( 8, 12);
                     -    ADD(12, 10);
                     -    ADD(10, 14);
                     -    ADD(14,  9);
                     -    ADD( 9, 13);
                     -    ADD(13, 11);
                     -    ADD(11, 15);
+                    -
                     -    out[ 0] = val0;
                     -    out[16] = val1;
                     -    out[ 8] = val2;
                     -    out[24] = val3;
                     -    out[ 4] = val4;
                     -    out[20] = val5;
                     -    out[12] = val6;
                     -    out[28] = val7;
                     -    out[ 2] = val8;
                     -    out[18] = val9;
                     -    out[10] = val10;
                     -    out[26] = val11;
                     -    out[ 6] = val12;
                     -    out[22] = val13;
                     -    out[14] = val14;
                     -    out[30] = val15;
+                    -
                     -    ADD(24, 28);
                     -    ADD(28, 26);
                     -    ADD(26, 30);
                     -    ADD(30, 25);
                     -    ADD(25, 29);
                     -    ADD(29, 27);
                     -    ADD(27, 31);
+                    -
                     -    out[ 1] = val16 + val24;
                     -    out[17] = val17 + val25;
                     -    out[ 9] = val18 + val26;
                     -    out[25] = val19 + val27;
                     -    out[ 5] = val20 + val28;
                     -    out[21] = val21 + val29;
                     -    out[13] = val22 + val30;
                     -    out[29] = val23 + val31;
                     -    out[ 3] = val24 + val20;
                     -    out[19] = val25 + val21;
                     -    out[11] = val26 + val22;
                     -    out[27] = val27 + val23;
                     -    out[ 7] = val28 + val18;
                     -    out[23] = val29 + val19;
                     -    out[15] = val30 + val17;
                     -    out[31] = val31;
                     -}

libavcodec/dct32_fixed.c

History View file @ ac0e03b

@@ -17,4 +17,4 @@
                       */
                      #define DCT32_FLOAT 0
                     -#include "dct32.c"
                     +#include "dct32_template.c"

libavcodec/dct32_float.c

History View file @ ac0e03b

@@ -17,4 +17,4 @@
                       */
                      #define DCT32_FLOAT 1
                     -#include "dct32.c"
                     +#include "dct32_template.c"

libavcodec/dct32_template.c

History View file @ ac0e03b

                     new file mode 100644
@@ -0,0 +1,276 @@
                     +/*
                     + * Template for the Discrete Cosine Transform for 32 samples
                     + * Copyright (c) 2001, 2002 Fabrice Bellard
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "dct32.h"
                     +#include "mathops.h"
+                    +
                     +#if DCT32_FLOAT
                     +#   define dct32 ff_dct32_float
                     +#   define FIXHR(x)       ((float)(x))
                     +#   define MULH3(x, y, s) ((s)*(y)*(x))
                     +#   define INTFLOAT float
                     +#else
                     +#   define dct32 ff_dct32_fixed
                     +#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
                     +#   define MULH3(x, y, s) MULH((s)*(x), y)
                     +#   define INTFLOAT int
                     +#endif
+                    +
+                    +
                     +/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+                    +
                     +/* cos(i*pi/64) */
+                    +
                     +#define COS0_0  FIXHR(0.50060299823519630134/2)
                     +#define COS0_1  FIXHR(0.50547095989754365998/2)
                     +#define COS0_2  FIXHR(0.51544730992262454697/2)
                     +#define COS0_3  FIXHR(0.53104259108978417447/2)
                     +#define COS0_4  FIXHR(0.55310389603444452782/2)
                     +#define COS0_5  FIXHR(0.58293496820613387367/2)
                     +#define COS0_6  FIXHR(0.62250412303566481615/2)
                     +#define COS0_7  FIXHR(0.67480834145500574602/2)
                     +#define COS0_8  FIXHR(0.74453627100229844977/2)
                     +#define COS0_9  FIXHR(0.83934964541552703873/2)
                     +#define COS0_10 FIXHR(0.97256823786196069369/2)
                     +#define COS0_11 FIXHR(1.16943993343288495515/4)
                     +#define COS0_12 FIXHR(1.48416461631416627724/4)
                     +#define COS0_13 FIXHR(2.05778100995341155085/8)
                     +#define COS0_14 FIXHR(3.40760841846871878570/8)
                     +#define COS0_15 FIXHR(10.19000812354805681150/32)
+                    +
                     +#define COS1_0 FIXHR(0.50241928618815570551/2)
                     +#define COS1_1 FIXHR(0.52249861493968888062/2)
                     +#define COS1_2 FIXHR(0.56694403481635770368/2)
                     +#define COS1_3 FIXHR(0.64682178335999012954/2)
                     +#define COS1_4 FIXHR(0.78815462345125022473/2)
                     +#define COS1_5 FIXHR(1.06067768599034747134/4)
                     +#define COS1_6 FIXHR(1.72244709823833392782/4)
                     +#define COS1_7 FIXHR(5.10114861868916385802/16)
+                    +
                     +#define COS2_0 FIXHR(0.50979557910415916894/2)
                     +#define COS2_1 FIXHR(0.60134488693504528054/2)
                     +#define COS2_2 FIXHR(0.89997622313641570463/2)
                     +#define COS2_3 FIXHR(2.56291544774150617881/8)
+                    +
                     +#define COS3_0 FIXHR(0.54119610014619698439/2)
                     +#define COS3_1 FIXHR(1.30656296487637652785/4)
+                    +
                     +#define COS4_0 FIXHR(0.70710678118654752439/2)
+                    +
                     +/* butterfly operator */
                     +#define BF(a, b, c, s)\
                     +{\
                     +    tmp0 = val##a + val##b;\
                     +    tmp1 = val##a - val##b;\
                     +    val##a = tmp0;\
                     +    val##b = MULH3(tmp1, c, 1<<(s));\
                     +}
+                    +
                     +#define BF0(a, b, c, s)\
                     +{\
                     +    tmp0 = tab[a] + tab[b];\
                     +    tmp1 = tab[a] - tab[b];\
                     +    val##a = tmp0;\
                     +    val##b = MULH3(tmp1, c, 1<<(s));\
                     +}
+                    +
                     +#define BF1(a, b, c, d)\
                     +{\
                     +    BF(a, b, COS4_0, 1);\
                     +    BF(c, d,-COS4_0, 1);\
                     +    val##c += val##d;\
                     +}
+                    +
                     +#define BF2(a, b, c, d)\
                     +{\
                     +    BF(a, b, COS4_0, 1);\
                     +    BF(c, d,-COS4_0, 1);\
                     +    val##c += val##d;\
                     +    val##a += val##c;\
                     +    val##c += val##b;\
                     +    val##b += val##d;\
                     +}
+                    +
                     +#define ADD(a, b) val##a += val##b
+                    +
                     +/* DCT32 without 1/sqrt(2) coef zero scaling. */
                     +void dct32(INTFLOAT *out, const INTFLOAT *tab)
                     +{
                     +    INTFLOAT tmp0, tmp1;
+                    +
                     +    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
                     +             val8 , val9 , val10, val11, val12, val13, val14, val15,
                     +             val16, val17, val18, val19, val20, val21, val22, val23,
                     +             val24, val25, val26, val27, val28, val29, val30, val31;
+                    +
                     +    /* pass 1 */
                     +    BF0( 0, 31, COS0_0 , 1);
                     +    BF0(15, 16, COS0_15, 5);
                     +    /* pass 2 */
                     +    BF( 0, 15, COS1_0 , 1);
                     +    BF(16, 31,-COS1_0 , 1);
                     +    /* pass 1 */
                     +    BF0( 7, 24, COS0_7 , 1);
                     +    BF0( 8, 23, COS0_8 , 1);
                     +    /* pass 2 */
                     +    BF( 7,  8, COS1_7 , 4);
                     +    BF(23, 24,-COS1_7 , 4);
                     +    /* pass 3 */
                     +    BF( 0,  7, COS2_0 , 1);
                     +    BF( 8, 15,-COS2_0 , 1);
                     +    BF(16, 23, COS2_0 , 1);
                     +    BF(24, 31,-COS2_0 , 1);
                     +    /* pass 1 */
                     +    BF0( 3, 28, COS0_3 , 1);
                     +    BF0(12, 19, COS0_12, 2);
                     +    /* pass 2 */
                     +    BF( 3, 12, COS1_3 , 1);
                     +    BF(19, 28,-COS1_3 , 1);
                     +    /* pass 1 */
                     +    BF0( 4, 27, COS0_4 , 1);
                     +    BF0(11, 20, COS0_11, 2);
                     +    /* pass 2 */
                     +    BF( 4, 11, COS1_4 , 1);
                     +    BF(20, 27,-COS1_4 , 1);
                     +    /* pass 3 */
                     +    BF( 3,  4, COS2_3 , 3);
                     +    BF(11, 12,-COS2_3 , 3);
                     +    BF(19, 20, COS2_3 , 3);
                     +    BF(27, 28,-COS2_3 , 3);
                     +    /* pass 4 */
                     +    BF( 0,  3, COS3_0 , 1);
                     +    BF( 4,  7,-COS3_0 , 1);
                     +    BF( 8, 11, COS3_0 , 1);
                     +    BF(12, 15,-COS3_0 , 1);
                     +    BF(16, 19, COS3_0 , 1);
                     +    BF(20, 23,-COS3_0 , 1);
                     +    BF(24, 27, COS3_0 , 1);
                     +    BF(28, 31,-COS3_0 , 1);
+                    +
+                    +
+                    +
                     +    /* pass 1 */
                     +    BF0( 1, 30, COS0_1 , 1);
                     +    BF0(14, 17, COS0_14, 3);
                     +    /* pass 2 */
                     +    BF( 1, 14, COS1_1 , 1);
                     +    BF(17, 30,-COS1_1 , 1);
                     +    /* pass 1 */
                     +    BF0( 6, 25, COS0_6 , 1);
                     +    BF0( 9, 22, COS0_9 , 1);
                     +    /* pass 2 */
                     +    BF( 6,  9, COS1_6 , 2);
                     +    BF(22, 25,-COS1_6 , 2);
                     +    /* pass 3 */
                     +    BF( 1,  6, COS2_1 , 1);
                     +    BF( 9, 14,-COS2_1 , 1);
                     +    BF(17, 22, COS2_1 , 1);
                     +    BF(25, 30,-COS2_1 , 1);
+                    +
                     +    /* pass 1 */
                     +    BF0( 2, 29, COS0_2 , 1);
                     +    BF0(13, 18, COS0_13, 3);
                     +    /* pass 2 */
                     +    BF( 2, 13, COS1_2 , 1);
                     +    BF(18, 29,-COS1_2 , 1);
                     +    /* pass 1 */
                     +    BF0( 5, 26, COS0_5 , 1);
                     +    BF0(10, 21, COS0_10, 1);
                     +    /* pass 2 */
                     +    BF( 5, 10, COS1_5 , 2);
                     +    BF(21, 26,-COS1_5 , 2);
                     +    /* pass 3 */
                     +    BF( 2,  5, COS2_2 , 1);
                     +    BF(10, 13,-COS2_2 , 1);
                     +    BF(18, 21, COS2_2 , 1);
                     +    BF(26, 29,-COS2_2 , 1);
                     +    /* pass 4 */
                     +    BF( 1,  2, COS3_1 , 2);
                     +    BF( 5,  6,-COS3_1 , 2);
                     +    BF( 9, 10, COS3_1 , 2);
                     +    BF(13, 14,-COS3_1 , 2);
                     +    BF(17, 18, COS3_1 , 2);
                     +    BF(21, 22,-COS3_1 , 2);
                     +    BF(25, 26, COS3_1 , 2);
                     +    BF(29, 30,-COS3_1 , 2);
+                    +
                     +    /* pass 5 */
                     +    BF1( 0,  1,  2,  3);
                     +    BF2( 4,  5,  6,  7);
                     +    BF1( 8,  9, 10, 11);
                     +    BF2(12, 13, 14, 15);
                     +    BF1(16, 17, 18, 19);
                     +    BF2(20, 21, 22, 23);
                     +    BF1(24, 25, 26, 27);
                     +    BF2(28, 29, 30, 31);
+                    +
                     +    /* pass 6 */
+                    +
                     +    ADD( 8, 12);
                     +    ADD(12, 10);
                     +    ADD(10, 14);
                     +    ADD(14,  9);
                     +    ADD( 9, 13);
                     +    ADD(13, 11);
                     +    ADD(11, 15);
+                    +
                     +    out[ 0] = val0;
                     +    out[16] = val1;
                     +    out[ 8] = val2;
                     +    out[24] = val3;
                     +    out[ 4] = val4;
                     +    out[20] = val5;
                     +    out[12] = val6;
                     +    out[28] = val7;
                     +    out[ 2] = val8;
                     +    out[18] = val9;
                     +    out[10] = val10;
                     +    out[26] = val11;
                     +    out[ 6] = val12;
                     +    out[22] = val13;
                     +    out[14] = val14;
                     +    out[30] = val15;
+                    +
                     +    ADD(24, 28);
                     +    ADD(28, 26);
                     +    ADD(26, 30);
                     +    ADD(30, 25);
                     +    ADD(25, 29);
                     +    ADD(29, 27);
                     +    ADD(27, 31);
+                    +
                     +    out[ 1] = val16 + val24;
                     +    out[17] = val17 + val25;
                     +    out[ 9] = val18 + val26;
                     +    out[25] = val19 + val27;
                     +    out[ 5] = val20 + val28;
                     +    out[21] = val21 + val29;
                     +    out[13] = val22 + val30;
                     +    out[29] = val23 + val31;
                     +    out[ 3] = val24 + val20;
                     +    out[19] = val25 + val21;
                     +    out[11] = val26 + val22;
                     +    out[27] = val27 + val23;
                     +    out[ 7] = val28 + val18;
                     +    out[23] = val29 + val19;
                     +    out[15] = val30 + val17;
                     +    out[31] = val31;
                     +}

libavcodec/fft.c

History View file @ ac0e03b

                     deleted file mode 100644
@@ -1,352 +0,0 @@
                     -/*
                     - * FFT/IFFT transforms
                     - * Copyright (c) 2008 Loren Merritt
                     - * Copyright (c) 2002 Fabrice Bellard
                     - * Partly based on libdjbfft by D. J. Bernstein
                     - *
                     - * This file is part of Libav.
                     - *
                     - * Libav is free software; you can redistribute it and/or
                     - * modify it under the terms of the GNU Lesser General Public
                     - * License as published by the Free Software Foundation; either
                     - * version 2.1 of the License, or (at your option) any later version.
                     - *
                     - * Libav is distributed in the hope that it will be useful,
                     - * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     - * Lesser General Public License for more details.
                     - *
                     - * You should have received a copy of the GNU Lesser General Public
                     - * License along with Libav; if not, write to the Free Software
                     - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     - */
+                    -
                     -/**
                     - * @file
                     - * FFT/IFFT transforms.
                     - */
+                    -
                     -#include <stdlib.h>
                     -#include <string.h>
                     -#include "libavutil/mathematics.h"
                     -#include "fft.h"
                     -#include "fft-internal.h"
+                    -
                     -/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
                     -#if !CONFIG_HARDCODED_TABLES
                     -COSTABLE(16);
                     -COSTABLE(32);
                     -COSTABLE(64);
                     -COSTABLE(128);
                     -COSTABLE(256);
                     -COSTABLE(512);
                     -COSTABLE(1024);
                     -COSTABLE(2048);
                     -COSTABLE(4096);
                     -COSTABLE(8192);
                     -COSTABLE(16384);
                     -COSTABLE(32768);
                     -COSTABLE(65536);
                     -#endif
                     -COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
                     -    NULL, NULL, NULL, NULL,
                     -    FFT_NAME(ff_cos_16),
                     -    FFT_NAME(ff_cos_32),
                     -    FFT_NAME(ff_cos_64),
                     -    FFT_NAME(ff_cos_128),
                     -    FFT_NAME(ff_cos_256),
                     -    FFT_NAME(ff_cos_512),
                     -    FFT_NAME(ff_cos_1024),
                     -    FFT_NAME(ff_cos_2048),
                     -    FFT_NAME(ff_cos_4096),
                     -    FFT_NAME(ff_cos_8192),
                     -    FFT_NAME(ff_cos_16384),
                     -    FFT_NAME(ff_cos_32768),
                     -    FFT_NAME(ff_cos_65536),
                     -};
+                    -
                     -static void fft_permute_c(FFTContext *s, FFTComplex *z);
                     -static void fft_calc_c(FFTContext *s, FFTComplex *z);
+                    -
                     -static int split_radix_permutation(int i, int n, int inverse)
                     -{
                     -    int m;
                     -    if(n <= 2) return i&1;
                     -    m = n >> 1;
                     -    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
                     -    m >>= 1;
                     -    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
                     -    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
                     -}
+                    -
                     -av_cold void ff_init_ff_cos_tabs(int index)
                     -{
                     -#if !CONFIG_HARDCODED_TABLES
                     -    int i;
                     -    int m = 1<<index;
                     -    double freq = 2*M_PI/m;
                     -    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
                     -    for(i=0; i<=m/4; i++)
                     -        tab[i] = FIX15(cos(i*freq));
                     -    for(i=1; i<m/4; i++)
                     -        tab[m/2-i] = tab[i];
                     -#endif
                     -}
+                    -
                     -static const int avx_tab[] = {
                     -    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
                     -};
+                    -
                     -static int is_second_half_of_fft32(int i, int n)
                     -{
                     -    if (n <= 32)
                     -        return i >= 16;
                     -    else if (i < n/2)
                     -        return is_second_half_of_fft32(i, n/2);
                     -    else if (i < 3*n/4)
                     -        return is_second_half_of_fft32(i - n/2, n/4);
                     -    else
                     -        return is_second_half_of_fft32(i - 3*n/4, n/4);
                     -}
+                    -
                     -static av_cold void fft_perm_avx(FFTContext *s)
                     -{
                     -    int i;
                     -    int n = 1 << s->nbits;
+                    -
                     -    for (i = 0; i < n; i += 16) {
                     -        int k;
                     -        if (is_second_half_of_fft32(i, n)) {
                     -            for (k = 0; k < 16; k++)
                     -                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
                     -                    i + avx_tab[k];
+                    -
                     -        } else {
                     -            for (k = 0; k < 16; k++) {
                     -                int j = i + k;
                     -                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
                     -                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
                     -            }
                     -        }
                     -    }
                     -}
+                    -
                     -av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
                     -{
                     -    int i, j, n;
+                    -
                     -    if (nbits < 2 || nbits > 16)
                     -        goto fail;
                     -    s->nbits = nbits;
                     -    n = 1 << nbits;
+                    -
                     -    s->revtab = av_malloc(n * sizeof(uint16_t));
                     -    if (!s->revtab)
                     -        goto fail;
                     -    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
                     -    if (!s->tmp_buf)
                     -        goto fail;
                     -    s->inverse = inverse;
                     -    s->fft_permutation = FF_FFT_PERM_DEFAULT;
+                    -
                     -    s->fft_permute = fft_permute_c;
                     -    s->fft_calc    = fft_calc_c;
                     -#if CONFIG_MDCT
                     -    s->imdct_calc  = ff_imdct_calc_c;
                     -    s->imdct_half  = ff_imdct_half_c;
                     -    s->mdct_calc   = ff_mdct_calc_c;
                     -#endif
+                    -
                     -#if CONFIG_FFT_FLOAT
                     -    if (ARCH_ARM)     ff_fft_init_arm(s);
                     -    if (ARCH_PPC)     ff_fft_init_ppc(s);
                     -    if (ARCH_X86)     ff_fft_init_x86(s);
                     -    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
                     -#else
                     -    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
                     -    if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
                     -#endif
+                    -
                     -    for(j=4; j<=nbits; j++) {
                     -        ff_init_ff_cos_tabs(j);
                     -    }
+                    -
                     -    if (s->fft_permutation == FF_FFT_PERM_AVX) {
                     -        fft_perm_avx(s);
                     -    } else {
                     -        for(i=0; i<n; i++) {
                     -            int j = i;
                     -            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
                     -                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
                     -            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
                     -        }
                     -    }
+                    -
                     -    return 0;
                     - fail:
                     -    av_freep(&s->revtab);
                     -    av_freep(&s->tmp_buf);
                     -    return -1;
                     -}
+                    -
                     -static void fft_permute_c(FFTContext *s, FFTComplex *z)
                     -{
                     -    int j, np;
                     -    const uint16_t *revtab = s->revtab;
                     -    np = 1 << s->nbits;
                     -    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
                     -    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
                     -    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
                     -}
+                    -
                     -av_cold void ff_fft_end(FFTContext *s)
                     -{
                     -    av_freep(&s->revtab);
                     -    av_freep(&s->tmp_buf);
                     -}
+                    -
                     -#define BUTTERFLIES(a0,a1,a2,a3) {\
                     -    BF(t3, t5, t5, t1);\
                     -    BF(a2.re, a0.re, a0.re, t5);\
                     -    BF(a3.im, a1.im, a1.im, t3);\
                     -    BF(t4, t6, t2, t6);\
                     -    BF(a3.re, a1.re, a1.re, t4);\
                     -    BF(a2.im, a0.im, a0.im, t6);\
                     -}
+                    -
                     -// force loading all the inputs before storing any.
                     -// this is slightly slower for small data, but avoids store->load aliasing
                     -// for addresses separated by large powers of 2.
                     -#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
                     -    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
                     -    BF(t3, t5, t5, t1);\
                     -    BF(a2.re, a0.re, r0, t5);\
                     -    BF(a3.im, a1.im, i1, t3);\
                     -    BF(t4, t6, t2, t6);\
                     -    BF(a3.re, a1.re, r1, t4);\
                     -    BF(a2.im, a0.im, i0, t6);\
                     -}
+                    -
                     -#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
                     -    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
                     -    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
                     -    BUTTERFLIES(a0,a1,a2,a3)\
                     -}
+                    -
                     -#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
                     -    t1 = a2.re;\
                     -    t2 = a2.im;\
                     -    t5 = a3.re;\
                     -    t6 = a3.im;\
                     -    BUTTERFLIES(a0,a1,a2,a3)\
                     -}
+                    -
                     -/* z[0...8n-1], w[1...2n-1] */
                     -#define PASS(name)\
                     -static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
                     -{\
                     -    FFTDouble t1, t2, t3, t4, t5, t6;\
                     -    int o1 = 2*n;\
                     -    int o2 = 4*n;\
                     -    int o3 = 6*n;\
                     -    const FFTSample *wim = wre+o1;\
                     -    n--;\
                     -\
                     -    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
                     -    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
                     -    do {\
                     -        z += 2;\
                     -        wre += 2;\
                     -        wim -= 2;\
                     -        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
                     -        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
                     -    } while(--n);\
                     -}
+                    -
                     -PASS(pass)
                     -#undef BUTTERFLIES
                     -#define BUTTERFLIES BUTTERFLIES_BIG
                     -PASS(pass_big)
+                    -
                     -#define DECL_FFT(n,n2,n4)\
                     -static void fft##n(FFTComplex *z)\
                     -{\
                     -    fft##n2(z);\
                     -    fft##n4(z+n4*2);\
                     -    fft##n4(z+n4*3);\
                     -    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
                     -}
+                    -
                     -static void fft4(FFTComplex *z)
                     -{
                     -    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
+                    -
                     -    BF(t3, t1, z[0].re, z[1].re);
                     -    BF(t8, t6, z[3].re, z[2].re);
                     -    BF(z[2].re, z[0].re, t1, t6);
                     -    BF(t4, t2, z[0].im, z[1].im);
                     -    BF(t7, t5, z[2].im, z[3].im);
                     -    BF(z[3].im, z[1].im, t4, t8);
                     -    BF(z[3].re, z[1].re, t3, t7);
                     -    BF(z[2].im, z[0].im, t2, t5);
                     -}
+                    -
                     -static void fft8(FFTComplex *z)
                     -{
                     -    FFTDouble t1, t2, t3, t4, t5, t6;
+                    -
                     -    fft4(z);
+                    -
                     -    BF(t1, z[5].re, z[4].re, -z[5].re);
                     -    BF(t2, z[5].im, z[4].im, -z[5].im);
                     -    BF(t5, z[7].re, z[6].re, -z[7].re);
                     -    BF(t6, z[7].im, z[6].im, -z[7].im);
+                    -
                     -    BUTTERFLIES(z[0],z[2],z[4],z[6]);
                     -    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
                     -}
+                    -
                     -#if !CONFIG_SMALL
                     -static void fft16(FFTComplex *z)
                     -{
                     -    FFTDouble t1, t2, t3, t4, t5, t6;
                     -    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
                     -    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
+                    -
                     -    fft8(z);
                     -    fft4(z+8);
                     -    fft4(z+12);
+                    -
                     -    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
                     -    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
                     -    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
                     -    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
                     -}
                     -#else
                     -DECL_FFT(16,8,4)
                     -#endif
                     -DECL_FFT(32,16,8)
                     -DECL_FFT(64,32,16)
                     -DECL_FFT(128,64,32)
                     -DECL_FFT(256,128,64)
                     -DECL_FFT(512,256,128)
                     -#if !CONFIG_SMALL
                     -#define pass pass_big
                     -#endif
                     -DECL_FFT(1024,512,256)
                     -DECL_FFT(2048,1024,512)
                     -DECL_FFT(4096,2048,1024)
                     -DECL_FFT(8192,4096,2048)
                     -DECL_FFT(16384,8192,4096)
                     -DECL_FFT(32768,16384,8192)
                     -DECL_FFT(65536,32768,16384)
+                    -
                     -static void (* const fft_dispatch[])(FFTComplex*) = {
                     -    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
                     -    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
                     -};
+                    -
                     -static void fft_calc_c(FFTContext *s, FFTComplex *z)
                     -{
                     -    fft_dispatch[s->nbits-2](z);
                     -}

libavcodec/fft_fixed.c

History View file @ ac0e03b

@@ -17,4 +17,4 @@
                       */
                      #define CONFIG_FFT_FLOAT 0
                     -#include "fft.c"
                     +#include "fft_template.c"

libavcodec/fft_float.c

History View file @ ac0e03b

@@ -17,4 +17,4 @@
                       */
                      #define CONFIG_FFT_FLOAT 1
                     -#include "fft.c"
                     +#include "fft_template.c"

libavcodec/fft_template.c

History View file @ ac0e03b

                     new file mode 100644
@@ -0,0 +1,352 @@
                     +/*
                     + * FFT/IFFT transforms
                     + * Copyright (c) 2008 Loren Merritt
                     + * Copyright (c) 2002 Fabrice Bellard
                     + * Partly based on libdjbfft by D. J. Bernstein
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +/**
                     + * @file
                     + * FFT/IFFT transforms.
                     + */
+                    +
                     +#include <stdlib.h>
                     +#include <string.h>
                     +#include "libavutil/mathematics.h"
                     +#include "fft.h"
                     +#include "fft-internal.h"
+                    +
                     +/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
                     +#if !CONFIG_HARDCODED_TABLES
                     +COSTABLE(16);
                     +COSTABLE(32);
                     +COSTABLE(64);
                     +COSTABLE(128);
                     +COSTABLE(256);
                     +COSTABLE(512);
                     +COSTABLE(1024);
                     +COSTABLE(2048);
                     +COSTABLE(4096);
                     +COSTABLE(8192);
                     +COSTABLE(16384);
                     +COSTABLE(32768);
                     +COSTABLE(65536);
                     +#endif
                     +COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
                     +    NULL, NULL, NULL, NULL,
                     +    FFT_NAME(ff_cos_16),
                     +    FFT_NAME(ff_cos_32),
                     +    FFT_NAME(ff_cos_64),
                     +    FFT_NAME(ff_cos_128),
                     +    FFT_NAME(ff_cos_256),
                     +    FFT_NAME(ff_cos_512),
                     +    FFT_NAME(ff_cos_1024),
                     +    FFT_NAME(ff_cos_2048),
                     +    FFT_NAME(ff_cos_4096),
                     +    FFT_NAME(ff_cos_8192),
                     +    FFT_NAME(ff_cos_16384),
                     +    FFT_NAME(ff_cos_32768),
                     +    FFT_NAME(ff_cos_65536),
                     +};
+                    +
                     +static void fft_permute_c(FFTContext *s, FFTComplex *z);
                     +static void fft_calc_c(FFTContext *s, FFTComplex *z);
+                    +
                     +static int split_radix_permutation(int i, int n, int inverse)
                     +{
                     +    int m;
                     +    if(n <= 2) return i&1;
                     +    m = n >> 1;
                     +    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
                     +    m >>= 1;
                     +    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
                     +    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
                     +}
+                    +
                     +av_cold void ff_init_ff_cos_tabs(int index)
                     +{
                     +#if !CONFIG_HARDCODED_TABLES
                     +    int i;
                     +    int m = 1<<index;
                     +    double freq = 2*M_PI/m;
                     +    FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
                     +    for(i=0; i<=m/4; i++)
                     +        tab[i] = FIX15(cos(i*freq));
                     +    for(i=1; i<m/4; i++)
                     +        tab[m/2-i] = tab[i];
                     +#endif
                     +}
+                    +
                     +static const int avx_tab[] = {
                     +    0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15
                     +};
+                    +
                     +static int is_second_half_of_fft32(int i, int n)
                     +{
                     +    if (n <= 32)
                     +        return i >= 16;
                     +    else if (i < n/2)
                     +        return is_second_half_of_fft32(i, n/2);
                     +    else if (i < 3*n/4)
                     +        return is_second_half_of_fft32(i - n/2, n/4);
                     +    else
                     +        return is_second_half_of_fft32(i - 3*n/4, n/4);
                     +}
+                    +
                     +static av_cold void fft_perm_avx(FFTContext *s)
                     +{
                     +    int i;
                     +    int n = 1 << s->nbits;
+                    +
                     +    for (i = 0; i < n; i += 16) {
                     +        int k;
                     +        if (is_second_half_of_fft32(i, n)) {
                     +            for (k = 0; k < 16; k++)
                     +                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] =
                     +                    i + avx_tab[k];
+                    +
                     +        } else {
                     +            for (k = 0; k < 16; k++) {
                     +                int j = i + k;
                     +                j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4);
                     +                s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j;
                     +            }
                     +        }
                     +    }
                     +}
+                    +
                     +av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
                     +{
                     +    int i, j, n;
+                    +
                     +    if (nbits < 2 || nbits > 16)
                     +        goto fail;
                     +    s->nbits = nbits;
                     +    n = 1 << nbits;
+                    +
                     +    s->revtab = av_malloc(n * sizeof(uint16_t));
                     +    if (!s->revtab)
                     +        goto fail;
                     +    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
                     +    if (!s->tmp_buf)
                     +        goto fail;
                     +    s->inverse = inverse;
                     +    s->fft_permutation = FF_FFT_PERM_DEFAULT;
+                    +
                     +    s->fft_permute = fft_permute_c;
                     +    s->fft_calc    = fft_calc_c;
                     +#if CONFIG_MDCT
                     +    s->imdct_calc  = ff_imdct_calc_c;
                     +    s->imdct_half  = ff_imdct_half_c;
                     +    s->mdct_calc   = ff_mdct_calc_c;
                     +#endif
+                    +
                     +#if CONFIG_FFT_FLOAT
                     +    if (ARCH_ARM)     ff_fft_init_arm(s);
                     +    if (ARCH_PPC)     ff_fft_init_ppc(s);
                     +    if (ARCH_X86)     ff_fft_init_x86(s);
                     +    if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
                     +#else
                     +    if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
                     +    if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
                     +#endif
+                    +
                     +    for(j=4; j<=nbits; j++) {
                     +        ff_init_ff_cos_tabs(j);
                     +    }
+                    +
                     +    if (s->fft_permutation == FF_FFT_PERM_AVX) {
                     +        fft_perm_avx(s);
                     +    } else {
                     +        for(i=0; i<n; i++) {
                     +            int j = i;
                     +            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
                     +                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
                     +            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
                     +        }
                     +    }
+                    +
                     +    return 0;
                     + fail:
                     +    av_freep(&s->revtab);
                     +    av_freep(&s->tmp_buf);
                     +    return -1;
                     +}
+                    +
                     +static void fft_permute_c(FFTContext *s, FFTComplex *z)
                     +{
                     +    int j, np;
                     +    const uint16_t *revtab = s->revtab;
                     +    np = 1 << s->nbits;
                     +    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
                     +    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
                     +    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
                     +}
+                    +
                     +av_cold void ff_fft_end(FFTContext *s)
                     +{
                     +    av_freep(&s->revtab);
                     +    av_freep(&s->tmp_buf);
                     +}
+                    +
                     +#define BUTTERFLIES(a0,a1,a2,a3) {\
                     +    BF(t3, t5, t5, t1);\
                     +    BF(a2.re, a0.re, a0.re, t5);\
                     +    BF(a3.im, a1.im, a1.im, t3);\
                     +    BF(t4, t6, t2, t6);\
                     +    BF(a3.re, a1.re, a1.re, t4);\
                     +    BF(a2.im, a0.im, a0.im, t6);\
                     +}
+                    +
                     +// force loading all the inputs before storing any.
                     +// this is slightly slower for small data, but avoids store->load aliasing
                     +// for addresses separated by large powers of 2.
                     +#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
                     +    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
                     +    BF(t3, t5, t5, t1);\
                     +    BF(a2.re, a0.re, r0, t5);\
                     +    BF(a3.im, a1.im, i1, t3);\
                     +    BF(t4, t6, t2, t6);\
                     +    BF(a3.re, a1.re, r1, t4);\
                     +    BF(a2.im, a0.im, i0, t6);\
                     +}
+                    +
                     +#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
                     +    CMUL(t1, t2, a2.re, a2.im, wre, -wim);\
                     +    CMUL(t5, t6, a3.re, a3.im, wre,  wim);\
                     +    BUTTERFLIES(a0,a1,a2,a3)\
                     +}
+                    +
                     +#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
                     +    t1 = a2.re;\
                     +    t2 = a2.im;\
                     +    t5 = a3.re;\
                     +    t6 = a3.im;\
                     +    BUTTERFLIES(a0,a1,a2,a3)\
                     +}
+                    +
                     +/* z[0...8n-1], w[1...2n-1] */
                     +#define PASS(name)\
                     +static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
                     +{\
                     +    FFTDouble t1, t2, t3, t4, t5, t6;\
                     +    int o1 = 2*n;\
                     +    int o2 = 4*n;\
                     +    int o3 = 6*n;\
                     +    const FFTSample *wim = wre+o1;\
                     +    n--;\
                     +\
                     +    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
                     +    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
                     +    do {\
                     +        z += 2;\
                     +        wre += 2;\
                     +        wim -= 2;\
                     +        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
                     +        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
                     +    } while(--n);\
                     +}
+                    +
                     +PASS(pass)
                     +#undef BUTTERFLIES
                     +#define BUTTERFLIES BUTTERFLIES_BIG
                     +PASS(pass_big)
+                    +
                     +#define DECL_FFT(n,n2,n4)\
                     +static void fft##n(FFTComplex *z)\
                     +{\
                     +    fft##n2(z);\
                     +    fft##n4(z+n4*2);\
                     +    fft##n4(z+n4*3);\
                     +    pass(z,FFT_NAME(ff_cos_##n),n4/2);\
                     +}
+                    +
                     +static void fft4(FFTComplex *z)
                     +{
                     +    FFTDouble t1, t2, t3, t4, t5, t6, t7, t8;
+                    +
                     +    BF(t3, t1, z[0].re, z[1].re);
                     +    BF(t8, t6, z[3].re, z[2].re);
                     +    BF(z[2].re, z[0].re, t1, t6);
                     +    BF(t4, t2, z[0].im, z[1].im);
                     +    BF(t7, t5, z[2].im, z[3].im);
                     +    BF(z[3].im, z[1].im, t4, t8);
                     +    BF(z[3].re, z[1].re, t3, t7);
                     +    BF(z[2].im, z[0].im, t2, t5);
                     +}
+                    +
                     +static void fft8(FFTComplex *z)
                     +{
                     +    FFTDouble t1, t2, t3, t4, t5, t6;
+                    +
                     +    fft4(z);
+                    +
                     +    BF(t1, z[5].re, z[4].re, -z[5].re);
                     +    BF(t2, z[5].im, z[4].im, -z[5].im);
                     +    BF(t5, z[7].re, z[6].re, -z[7].re);
                     +    BF(t6, z[7].im, z[6].im, -z[7].im);
+                    +
                     +    BUTTERFLIES(z[0],z[2],z[4],z[6]);
                     +    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
                     +}
+                    +
                     +#if !CONFIG_SMALL
                     +static void fft16(FFTComplex *z)
                     +{
                     +    FFTDouble t1, t2, t3, t4, t5, t6;
                     +    FFTSample cos_16_1 = FFT_NAME(ff_cos_16)[1];
                     +    FFTSample cos_16_3 = FFT_NAME(ff_cos_16)[3];
+                    +
                     +    fft8(z);
                     +    fft4(z+8);
                     +    fft4(z+12);
+                    +
                     +    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
                     +    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
                     +    TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
                     +    TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
                     +}
                     +#else
                     +DECL_FFT(16,8,4)
                     +#endif
                     +DECL_FFT(32,16,8)
                     +DECL_FFT(64,32,16)
                     +DECL_FFT(128,64,32)
                     +DECL_FFT(256,128,64)
                     +DECL_FFT(512,256,128)
                     +#if !CONFIG_SMALL
                     +#define pass pass_big
                     +#endif
                     +DECL_FFT(1024,512,256)
                     +DECL_FFT(2048,1024,512)
                     +DECL_FFT(4096,2048,1024)
                     +DECL_FFT(8192,4096,2048)
                     +DECL_FFT(16384,8192,4096)
                     +DECL_FFT(32768,16384,8192)
                     +DECL_FFT(65536,32768,16384)
+                    +
                     +static void (* const fft_dispatch[])(FFTComplex*) = {
                     +    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
                     +    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
                     +};
+                    +
                     +static void fft_calc_c(FFTContext *s, FFTComplex *z)
                     +{
                     +    fft_dispatch[s->nbits-2](z);
                     +}

libavcodec/mdct.c

History View file @ ac0e03b

                     deleted file mode 100644
@@ -1,203 +0,0 @@
                     -/*
                     - * MDCT/IMDCT transforms
                     - * Copyright (c) 2002 Fabrice Bellard
                     - *
                     - * This file is part of Libav.
                     - *
                     - * Libav is free software; you can redistribute it and/or
                     - * modify it under the terms of the GNU Lesser General Public
                     - * License as published by the Free Software Foundation; either
                     - * version 2.1 of the License, or (at your option) any later version.
                     - *
                     - * Libav is distributed in the hope that it will be useful,
                     - * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     - * Lesser General Public License for more details.
                     - *
                     - * You should have received a copy of the GNU Lesser General Public
                     - * License along with Libav; if not, write to the Free Software
                     - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     - */
+                    -
                     -#include <stdlib.h>
                     -#include <string.h>
                     -#include "libavutil/common.h"
                     -#include "libavutil/mathematics.h"
                     -#include "fft.h"
                     -#include "fft-internal.h"
+                    -
                     -/**
                     - * @file
                     - * MDCT/IMDCT transforms.
                     - */
+                    -
                     -#if CONFIG_FFT_FLOAT
                     -#   define RSCALE(x) (x)
                     -#else
                     -#   define RSCALE(x) ((x) >> 1)
                     -#endif
+                    -
                     -/**
                     - * init MDCT or IMDCT computation.
                     - */
                     -av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
                     -{
                     -    int n, n4, i;
                     -    double alpha, theta;
                     -    int tstep;
+                    -
                     -    memset(s, 0, sizeof(*s));
                     -    n = 1 << nbits;
                     -    s->mdct_bits = nbits;
                     -    s->mdct_size = n;
                     -    n4 = n >> 2;
                     -    s->mdct_permutation = FF_MDCT_PERM_NONE;
+                    -
                     -    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
                     -        goto fail;
+                    -
                     -    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
                     -    if (!s->tcos)
                     -        goto fail;
+                    -
                     -    switch (s->mdct_permutation) {
                     -    case FF_MDCT_PERM_NONE:
                     -        s->tsin = s->tcos + n4;
                     -        tstep = 1;
                     -        break;
                     -    case FF_MDCT_PERM_INTERLEAVE:
                     -        s->tsin = s->tcos + 1;
                     -        tstep = 2;
                     -        break;
                     -    default:
                     -        goto fail;
                     -    }
+                    -
                     -    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
                     -    scale = sqrt(fabs(scale));
                     -    for(i=0;i<n4;i++) {
                     -        alpha = 2 * M_PI * (i + theta) / n;
                     -        s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
                     -        s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
                     -    }
                     -    return 0;
                     - fail:
                     -    ff_mdct_end(s);
                     -    return -1;
                     -}
+                    -
                     -/**
                     - * Compute the middle half of the inverse MDCT of size N = 2^nbits,
                     - * thus excluding the parts that can be derived by symmetry
                     - * @param output N/2 samples
                     - * @param input N/2 samples
                     - */
                     -void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
                     -{
                     -    int k, n8, n4, n2, n, j;
                     -    const uint16_t *revtab = s->revtab;
                     -    const FFTSample *tcos = s->tcos;
                     -    const FFTSample *tsin = s->tsin;
                     -    const FFTSample *in1, *in2;
                     -    FFTComplex *z = (FFTComplex *)output;
+                    -
                     -    n = 1 << s->mdct_bits;
                     -    n2 = n >> 1;
                     -    n4 = n >> 2;
                     -    n8 = n >> 3;
+                    -
                     -    /* pre rotation */
                     -    in1 = input;
                     -    in2 = input + n2 - 1;
                     -    for(k = 0; k < n4; k++) {
                     -        j=revtab[k];
                     -        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
                     -        in1 += 2;
                     -        in2 -= 2;
                     -    }
                     -    s->fft_calc(s, z);
+                    -
                     -    /* post rotation + reordering */
                     -    for(k = 0; k < n8; k++) {
                     -        FFTSample r0, i0, r1, i1;
                     -        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
                     -        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
                     -        z[n8-k-1].re = r0;
                     -        z[n8-k-1].im = i0;
                     -        z[n8+k  ].re = r1;
                     -        z[n8+k  ].im = i1;
                     -    }
                     -}
+                    -
                     -/**
                     - * Compute inverse MDCT of size N = 2^nbits
                     - * @param output N samples
                     - * @param input N/2 samples
                     - */
                     -void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
                     -{
                     -    int k;
                     -    int n = 1 << s->mdct_bits;
                     -    int n2 = n >> 1;
                     -    int n4 = n >> 2;
+                    -
                     -    ff_imdct_half_c(s, output+n4, input);
+                    -
                     -    for(k = 0; k < n4; k++) {
                     -        output[k] = -output[n2-k-1];
                     -        output[n-k-1] = output[n2+k];
                     -    }
                     -}
+                    -
                     -/**
                     - * Compute MDCT of size N = 2^nbits
                     - * @param input N samples
                     - * @param out N/2 samples
                     - */
                     -void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
                     -{
                     -    int i, j, n, n8, n4, n2, n3;
                     -    FFTDouble re, im;
                     -    const uint16_t *revtab = s->revtab;
                     -    const FFTSample *tcos = s->tcos;
                     -    const FFTSample *tsin = s->tsin;
                     -    FFTComplex *x = (FFTComplex *)out;
+                    -
                     -    n = 1 << s->mdct_bits;
                     -    n2 = n >> 1;
                     -    n4 = n >> 2;
                     -    n8 = n >> 3;
                     -    n3 = 3 * n4;
+                    -
                     -    /* pre rotation */
                     -    for(i=0;i<n8;i++) {
                     -        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
                     -        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
                     -        j = revtab[i];
                     -        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+                    -
                     -        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
                     -        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
                     -        j = revtab[n8 + i];
                     -        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
                     -    }
+                    -
                     -    s->fft_calc(s, x);
+                    -
                     -    /* post rotation */
                     -    for(i=0;i<n8;i++) {
                     -        FFTSample r0, i0, r1, i1;
                     -        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
                     -        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
                     -        x[n8-i-1].re = r0;
                     -        x[n8-i-1].im = i0;
                     -        x[n8+i  ].re = r1;
                     -        x[n8+i  ].im = i1;
                     -    }
                     -}
+                    -
                     -av_cold void ff_mdct_end(FFTContext *s)
                     -{
                     -    av_freep(&s->tcos);
                     -    ff_fft_end(s);
                     -}

libavcodec/mdct_fixed.c

History View file @ ac0e03b

@@ -17,7 +17,7 @@
                       */
                      #define CONFIG_FFT_FLOAT 0
                     -#include "mdct.c"
                     +#include "mdct_template.c"
                      /* same as ff_mdct_calcw_c with double-width unscaled output */
                      void ff_mdct_calcw_c(FFTContext *s, FFTDouble *out, const FFTSample *input)

libavcodec/mdct_float.c

History View file @ ac0e03b

@@ -17,4 +17,4 @@
                       */
                      #define CONFIG_FFT_FLOAT 1
                     -#include "mdct.c"
                     +#include "mdct_template.c"

libavcodec/mdct_template.c

History View file @ ac0e03b

                     new file mode 100644
@@ -0,0 +1,203 @@
                     +/*
                     + * MDCT/IMDCT transforms
                     + * Copyright (c) 2002 Fabrice Bellard
                     + *
                     + * This file is part of Libav.
                     + *
                     + * Libav is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * Libav is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with Libav; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include <stdlib.h>
                     +#include <string.h>
                     +#include "libavutil/common.h"
                     +#include "libavutil/mathematics.h"
                     +#include "fft.h"
                     +#include "fft-internal.h"
+                    +
                     +/**
                     + * @file
                     + * MDCT/IMDCT transforms.
                     + */
+                    +
                     +#if CONFIG_FFT_FLOAT
                     +#   define RSCALE(x) (x)
                     +#else
                     +#   define RSCALE(x) ((x) >> 1)
                     +#endif
+                    +
                     +/**
                     + * init MDCT or IMDCT computation.
                     + */
                     +av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
                     +{
                     +    int n, n4, i;
                     +    double alpha, theta;
                     +    int tstep;
+                    +
                     +    memset(s, 0, sizeof(*s));
                     +    n = 1 << nbits;
                     +    s->mdct_bits = nbits;
                     +    s->mdct_size = n;
                     +    n4 = n >> 2;
                     +    s->mdct_permutation = FF_MDCT_PERM_NONE;
+                    +
                     +    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
                     +        goto fail;
+                    +
                     +    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
                     +    if (!s->tcos)
                     +        goto fail;
+                    +
                     +    switch (s->mdct_permutation) {
                     +    case FF_MDCT_PERM_NONE:
                     +        s->tsin = s->tcos + n4;
                     +        tstep = 1;
                     +        break;
                     +    case FF_MDCT_PERM_INTERLEAVE:
                     +        s->tsin = s->tcos + 1;
                     +        tstep = 2;
                     +        break;
                     +    default:
                     +        goto fail;
                     +    }
+                    +
                     +    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
                     +    scale = sqrt(fabs(scale));
                     +    for(i=0;i<n4;i++) {
                     +        alpha = 2 * M_PI * (i + theta) / n;
                     +        s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
                     +        s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
                     +    }
                     +    return 0;
                     + fail:
                     +    ff_mdct_end(s);
                     +    return -1;
                     +}
+                    +
                     +/**
                     + * Compute the middle half of the inverse MDCT of size N = 2^nbits,
                     + * thus excluding the parts that can be derived by symmetry
                     + * @param output N/2 samples
                     + * @param input N/2 samples
                     + */
                     +void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
                     +{
                     +    int k, n8, n4, n2, n, j;
                     +    const uint16_t *revtab = s->revtab;
                     +    const FFTSample *tcos = s->tcos;
                     +    const FFTSample *tsin = s->tsin;
                     +    const FFTSample *in1, *in2;
                     +    FFTComplex *z = (FFTComplex *)output;
+                    +
                     +    n = 1 << s->mdct_bits;
                     +    n2 = n >> 1;
                     +    n4 = n >> 2;
                     +    n8 = n >> 3;
+                    +
                     +    /* pre rotation */
                     +    in1 = input;
                     +    in2 = input + n2 - 1;
                     +    for(k = 0; k < n4; k++) {
                     +        j=revtab[k];
                     +        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
                     +        in1 += 2;
                     +        in2 -= 2;
                     +    }
                     +    s->fft_calc(s, z);
+                    +
                     +    /* post rotation + reordering */
                     +    for(k = 0; k < n8; k++) {
                     +        FFTSample r0, i0, r1, i1;
                     +        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
                     +        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
                     +        z[n8-k-1].re = r0;
                     +        z[n8-k-1].im = i0;
                     +        z[n8+k  ].re = r1;
                     +        z[n8+k  ].im = i1;
                     +    }
                     +}
+                    +
                     +/**
                     + * Compute inverse MDCT of size N = 2^nbits
                     + * @param output N samples
                     + * @param input N/2 samples
                     + */
                     +void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
                     +{
                     +    int k;
                     +    int n = 1 << s->mdct_bits;
                     +    int n2 = n >> 1;
                     +    int n4 = n >> 2;
+                    +
                     +    ff_imdct_half_c(s, output+n4, input);
+                    +
                     +    for(k = 0; k < n4; k++) {
                     +        output[k] = -output[n2-k-1];
                     +        output[n-k-1] = output[n2+k];
                     +    }
                     +}
+                    +
                     +/**
                     + * Compute MDCT of size N = 2^nbits
                     + * @param input N samples
                     + * @param out N/2 samples
                     + */
                     +void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
                     +{
                     +    int i, j, n, n8, n4, n2, n3;
                     +    FFTDouble re, im;
                     +    const uint16_t *revtab = s->revtab;
                     +    const FFTSample *tcos = s->tcos;
                     +    const FFTSample *tsin = s->tsin;
                     +    FFTComplex *x = (FFTComplex *)out;
+                    +
                     +    n = 1 << s->mdct_bits;
                     +    n2 = n >> 1;
                     +    n4 = n >> 2;
                     +    n8 = n >> 3;
                     +    n3 = 3 * n4;
+                    +
                     +    /* pre rotation */
                     +    for(i=0;i<n8;i++) {
                     +        re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]);
                     +        im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]);
                     +        j = revtab[i];
                     +        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+                    +
                     +        re = RSCALE( input[2*i]    - input[n2-1-2*i]);
                     +        im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]);
                     +        j = revtab[n8 + i];
                     +        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
                     +    }
+                    +
                     +    s->fft_calc(s, x);
+                    +
                     +    /* post rotation */
                     +    for(i=0;i<n8;i++) {
                     +        FFTSample r0, i0, r1, i1;
                     +        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
                     +        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
                     +        x[n8-i-1].re = r0;
                     +        x[n8-i-1].im = i0;
                     +        x[n8+i  ].re = r1;
                     +        x[n8+i  ].im = i1;
                     +    }
                     +}
+                    +
                     +av_cold void ff_mdct_end(FFTContext *s)
                     +{
                     +    av_freep(&s->tcos);
                     +    ff_fft_end(s);
                     +}