Originally committed as revision 19846 to svn://svn.ffmpeg.org/ffmpeg/trunk
| ... | ... |
@@ -327,8 +327,10 @@ function ff_fft_permute_neon, export=1 |
| 327 | 327 |
1: |
| 328 | 328 |
vld1.32 {d0-d1}, [r1,:128]!
|
| 329 | 329 |
ldr r4, [r0], #4 |
| 330 |
- uxtah lr, r3, r4 |
|
| 331 |
- uxtah r4, r3, r4, ror #16 |
|
| 330 |
+ uxth lr, r4 |
|
| 331 |
+ uxth r4, r4, ror #16 |
|
| 332 |
+ add lr, r3, lr, lsl #3 |
|
| 333 |
+ add r4, r3, r4, lsl #3 |
|
| 332 | 334 |
vst1.32 {d0}, [lr,:64]
|
| 333 | 335 |
vst1.32 {d1}, [r4,:64]
|
| 334 | 336 |
subs r12, r12, #2 |
| ... | ... |
@@ -52,8 +52,10 @@ function ff_imdct_half_neon, export=1 |
| 52 | 52 |
vmul.f32 d5, d17, d3 |
| 53 | 53 |
vsub.f32 d4, d6, d4 |
| 54 | 54 |
vadd.f32 d5, d5, d7 |
| 55 |
- uxtah r8, r1, r6, ror #16 |
|
| 56 |
- uxtah r6, r1, r6 |
|
| 55 |
+ uxth r8, r6, ror #16 |
|
| 56 |
+ uxth r6, r6 |
|
| 57 |
+ add r8, r1, r8, lsl #3 |
|
| 58 |
+ add r6, r1, r6, lsl #3 |
|
| 57 | 59 |
beq 1f |
| 58 | 60 |
vld2.32 {d16-d17},[r7,:128],r12
|
| 59 | 61 |
vld2.32 {d0-d1}, [r2,:128]!
|
| ... | ... |
@@ -198,8 +200,10 @@ function ff_mdct_calc_neon, export=1 |
| 198 | 198 |
subs lr, lr, #16 |
| 199 | 199 |
vsub.f32 d6, d6, d7 @ -R*c-I*s |
| 200 | 200 |
vadd.f32 d7, d4, d5 @ -R*s+I*c |
| 201 |
- uxtah r10, r1, r6, ror #16 |
|
| 202 |
- uxtah r6, r1, r6 |
|
| 201 |
+ uxth r10, r6, ror #16 |
|
| 202 |
+ uxth r6, r6 |
|
| 203 |
+ add r10, r1, r10, lsl #3 |
|
| 204 |
+ add r6, r1, r6, lsl #3 |
|
| 203 | 205 |
beq 1f |
| 204 | 206 |
vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0
|
| 205 | 207 |
vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0
|
| ... | ... |
@@ -245,8 +249,10 @@ function ff_mdct_calc_neon, export=1 |
| 245 | 245 |
subs lr, lr, #16 |
| 246 | 246 |
vsub.f32 d6, d7, d6 @ I*s-R*c |
| 247 | 247 |
vadd.f32 d7, d4, d5 @ R*s-I*c |
| 248 |
- uxtah r10, r1, r6, ror #16 |
|
| 249 |
- uxtah r6, r1, r6 |
|
| 248 |
+ uxth r10, r6, ror #16 |
|
| 249 |
+ uxth r6, r6 |
|
| 250 |
+ add r10, r1, r10, lsl #3 |
|
| 251 |
+ add r6, r1, r6, lsl #3 |
|
| 250 | 252 |
beq 1f |
| 251 | 253 |
vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0
|
| 252 | 254 |
vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0
|
| ... | ... |
@@ -64,7 +64,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
| 64 | 64 |
float alpha, c1, s1, s2; |
| 65 | 65 |
int split_radix = 1; |
| 66 | 66 |
int av_unused has_vectors; |
| 67 |
- int revtab_shift = 0; |
|
| 68 | 67 |
|
| 69 | 68 |
if (nbits < 2 || nbits > 16) |
| 70 | 69 |
goto fail; |
| ... | ... |
@@ -120,7 +119,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
| 120 | 120 |
s->imdct_calc = ff_imdct_calc_neon; |
| 121 | 121 |
s->imdct_half = ff_imdct_half_neon; |
| 122 | 122 |
s->mdct_calc = ff_mdct_calc_neon; |
| 123 |
- revtab_shift = 3; |
|
| 124 | 123 |
#endif |
| 125 | 124 |
|
| 126 | 125 |
if (split_radix) {
|
| ... | ... |
@@ -134,8 +132,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
| 134 | 134 |
tab[m/2-i] = tab[i]; |
| 135 | 135 |
} |
| 136 | 136 |
for(i=0; i<n; i++) |
| 137 |
- s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = |
|
| 138 |
- i << revtab_shift; |
|
| 137 |
+ s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; |
|
| 139 | 138 |
s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); |
| 140 | 139 |
} else {
|
| 141 | 140 |
int np, nblocks, np2, l; |