DSPContext.vector_fmul_window()
DCADSPContext.lfe_fir()
SynthFilterContext.synth_filter_float()
Signed-off-by: Mans Rullgard <mans@mansr.com>
| ... | ... |
@@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) |
| 1721 | 1721 |
*/ |
| 1722 | 1722 |
if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && |
| 1723 | 1723 |
(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
|
| 1724 |
- ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 0, 512); |
|
| 1724 |
+ ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); |
|
| 1725 | 1725 |
} else {
|
| 1726 | 1726 |
memcpy( out, saved, 448 * sizeof(float)); |
| 1727 | 1727 |
|
| 1728 | 1728 |
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
| 1729 |
- ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 0, 64); |
|
| 1730 |
- ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 0, 64); |
|
| 1731 |
- ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 0, 64); |
|
| 1732 |
- ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 0, 64); |
|
| 1733 |
- ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 0, 64); |
|
| 1729 |
+ ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64); |
|
| 1730 |
+ ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, 64); |
|
| 1731 |
+ ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64); |
|
| 1732 |
+ ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64); |
|
| 1733 |
+ ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64); |
|
| 1734 | 1734 |
memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); |
| 1735 | 1735 |
} else {
|
| 1736 |
- ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 0, 64); |
|
| 1736 |
+ ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); |
|
| 1737 | 1737 |
memcpy( out + 576, buf + 64, 448 * sizeof(float)); |
| 1738 | 1738 |
} |
| 1739 | 1739 |
} |
| ... | ... |
@@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce) |
| 1741 | 1741 |
// buffer update |
| 1742 | 1742 |
if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
|
| 1743 | 1743 |
memcpy( saved, temp + 64, 64 * sizeof(float)); |
| 1744 |
- ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64); |
|
| 1745 |
- ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64); |
|
| 1746 |
- ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64); |
|
| 1744 |
+ ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); |
|
| 1745 |
+ ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); |
|
| 1746 |
+ ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); |
|
| 1747 | 1747 |
memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); |
| 1748 | 1748 |
} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
|
| 1749 | 1749 |
memcpy( saved, buf + 512, 448 * sizeof(float)); |
| ... | ... |
@@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels) |
| 628 | 628 |
for(i=0; i<128; i++) |
| 629 | 629 |
x[i] = s->transform_coeffs[ch][2*i]; |
| 630 | 630 |
ff_imdct_half(&s->imdct_256, s->tmp_output, x); |
| 631 |
- s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128); |
|
| 631 |
+ s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128); |
|
| 632 | 632 |
for(i=0; i<128; i++) |
| 633 | 633 |
x[i] = s->transform_coeffs[ch][2*i+1]; |
| 634 | 634 |
ff_imdct_half(&s->imdct_256, s->delay[ch-1], x); |
| 635 | 635 |
} else {
|
| 636 | 636 |
ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]); |
| 637 |
- s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128); |
|
| 637 |
+ s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128); |
|
| 638 | 638 |
memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float)); |
| 639 | 639 |
} |
| 640 | 640 |
} |
| ... | ... |
@@ -23,7 +23,7 @@ |
| 23 | 23 |
#include "libavcodec/dcadsp.h" |
| 24 | 24 |
|
| 25 | 25 |
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, |
| 26 |
- int decifactor, float scale, float bias); |
|
| 26 |
+ int decifactor, float scale); |
|
| 27 | 27 |
|
| 28 | 28 |
void av_cold ff_dcadsp_init_arm(DCADSPContext *s) |
| 29 | 29 |
{
|
| ... | ... |
@@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1 |
| 29 | 29 |
cmp r3, #32 |
| 30 | 30 |
moveq r6, #256/32 |
| 31 | 31 |
movne r6, #256/64 |
| 32 |
-NOVFP vldr d0, [sp, #16] @ scale, bias |
|
| 32 |
+NOVFP vldr s0, [sp, #16] @ scale |
|
| 33 | 33 |
mov lr, #-16 |
| 34 | 34 |
1: |
| 35 | 35 |
vmov.f32 q2, #0.0 @ v0 |
| ... | ... |
@@ -51,8 +51,7 @@ NOVFP vldr d0, [sp, #16] @ scale, bias |
| 51 | 51 |
vadd.f32 d4, d4, d5 |
| 52 | 52 |
vadd.f32 d6, d6, d7 |
| 53 | 53 |
vpadd.f32 d4, d4, d6 |
| 54 |
- vdup.32 d5, d0[1] |
|
| 55 |
- vmla.f32 d5, d4, d0[0] |
|
| 54 |
+ vmul.f32 d5, d4, d0[0] |
|
| 56 | 55 |
vst1.32 {d5[0]}, [r0,:32]!
|
| 57 | 56 |
vst1.32 {d5[1]}, [r4,:32]!
|
| 58 | 57 |
bne 1b |
| ... | ... |
@@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); |
| 140 | 140 |
|
| 141 | 141 |
void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); |
| 142 | 142 |
void ff_vector_fmul_window_neon(float *dst, const float *src0, |
| 143 |
- const float *src1, const float *win, |
|
| 144 |
- float add_bias, int len); |
|
| 143 |
+ const float *src1, const float *win, int len); |
|
| 145 | 144 |
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, |
| 146 | 145 |
int len); |
| 147 | 146 |
void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src, |
| ... | ... |
@@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1 |
| 777 | 777 |
endfunc |
| 778 | 778 |
|
| 779 | 779 |
function ff_vector_fmul_window_neon, export=1 |
| 780 |
-VFP vdup.32 q8, d0[0] |
|
| 781 |
-NOVFP vld1.32 {d16[],d17[]}, [sp,:32]
|
|
| 782 | 780 |
push {r4,r5,lr}
|
| 783 |
-VFP ldr lr, [sp, #12] |
|
| 784 |
-NOVFP ldr lr, [sp, #16] |
|
| 781 |
+ ldr lr, [sp, #12] |
|
| 785 | 782 |
sub r2, r2, #8 |
| 786 | 783 |
sub r5, lr, #2 |
| 787 | 784 |
add r2, r2, r5, lsl #2 |
| ... | ... |
@@ -793,14 +790,12 @@ NOVFP ldr lr, [sp, #16] |
| 793 | 793 |
vld1.64 {d4,d5}, [r3,:128]!
|
| 794 | 794 |
vld1.64 {d6,d7}, [r4,:128], r5
|
| 795 | 795 |
1: subs lr, lr, #4 |
| 796 |
- vmov q11, q8 |
|
| 797 |
- vmla.f32 d22, d0, d4 |
|
| 798 |
- vmov q10, q8 |
|
| 799 |
- vmla.f32 d23, d1, d5 |
|
| 796 |
+ vmul.f32 d22, d0, d4 |
|
| 800 | 797 |
vrev64.32 q3, q3 |
| 801 |
- vmla.f32 d20, d0, d7 |
|
| 798 |
+ vmul.f32 d23, d1, d5 |
|
| 802 | 799 |
vrev64.32 q1, q1 |
| 803 |
- vmla.f32 d21, d1, d6 |
|
| 800 |
+ vmul.f32 d20, d0, d7 |
|
| 801 |
+ vmul.f32 d21, d1, d6 |
|
| 804 | 802 |
beq 2f |
| 805 | 803 |
vmla.f32 d22, d3, d7 |
| 806 | 804 |
vld1.64 {d0,d1}, [r1,:128]!
|
| ... | ... |
@@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct, |
| 34 | 34 |
float *synth_buf_ptr, int *synth_buf_offset, |
| 35 | 35 |
float synth_buf2[32], const float window[512], |
| 36 | 36 |
float out[32], const float in[32], |
| 37 |
- float scale, float bias); |
|
| 37 |
+ float scale); |
|
| 38 | 38 |
|
| 39 | 39 |
av_cold void ff_fft_init_arm(FFTContext *s) |
| 40 | 40 |
{
|
| ... | ... |
@@ -42,7 +42,7 @@ VFP vpop {d0}
|
| 42 | 42 |
|
| 43 | 43 |
ldr r5, [sp, #9*4] @ window |
| 44 | 44 |
ldr r2, [sp, #10*4] @ out |
| 45 |
-NOVFP vldr d0, [sp, #12*4] @ scale, bias |
|
| 45 |
+NOVFP vldr s0, [sp, #12*4] @ scale |
|
| 46 | 46 |
add r8, r9, #12*4 |
| 47 | 47 |
|
| 48 | 48 |
mov lr, #64*4 |
| ... | ... |
@@ -90,10 +90,8 @@ NOVFP vldr d0, [sp, #12*4] @ scale, bias |
| 90 | 90 |
sub r11, r11, #512*4 |
| 91 | 91 |
b 2b |
| 92 | 92 |
3: |
| 93 |
- vdup.32 q8, d0[1] |
|
| 94 |
- vdup.32 q9, d0[1] |
|
| 95 |
- vmla.f32 q8, q10, d0[0] |
|
| 96 |
- vmla.f32 q9, q1, d0[0] |
|
| 93 |
+ vmul.f32 q8, q10, d0[0] |
|
| 94 |
+ vmul.f32 q9, q1, d0[0] |
|
| 97 | 95 |
vst1.32 {q3}, [r3,:128]
|
| 98 | 96 |
sub r3, r3, #16*4 |
| 99 | 97 |
vst1.32 {q2}, [r3,:128]
|
| ... | ... |
@@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q) |
| 141 | 141 |
|
| 142 | 142 |
/* overlap and window */ |
| 143 | 143 |
q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf, |
| 144 |
- &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16); |
|
| 144 |
+ &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16); |
|
| 145 | 145 |
|
| 146 | 146 |
prev_buf = &su->spectrum[0][ref_pos+start_pos + 16]; |
| 147 | 147 |
start_pos += block_size; |
| ... | ... |
@@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans, |
| 896 | 896 |
s->synth.synth_filter_float(&s->imdct, |
| 897 | 897 |
s->subband_fir_hist[chans], &s->hist_index[chans], |
| 898 | 898 |
s->subband_fir_noidea[chans], prCoeff, |
| 899 |
- samples_out, s->raXin, scale, 0); |
|
| 899 |
+ samples_out, s->raXin, scale); |
|
| 900 | 900 |
samples_out+= 32; |
| 901 | 901 |
|
| 902 | 902 |
} |
| ... | ... |
@@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select, |
| 929 | 929 |
/* Interpolation */ |
| 930 | 930 |
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
|
| 931 | 931 |
s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, |
| 932 |
- scale, 0); |
|
| 932 |
+ scale); |
|
| 933 | 933 |
samples_in++; |
| 934 | 934 |
samples_out += 2 * decifactor; |
| 935 | 935 |
} |
| ... | ... |
@@ -23,7 +23,7 @@ |
| 23 | 23 |
#include "dcadsp.h" |
| 24 | 24 |
|
| 25 | 25 |
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, |
| 26 |
- int decifactor, float scale, float bias) |
|
| 26 |
+ int decifactor, float scale) |
|
| 27 | 27 |
{
|
| 28 | 28 |
float *out2 = out + decifactor; |
| 29 | 29 |
const float *cf0 = coefs; |
| ... | ... |
@@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, |
| 39 | 39 |
v0 += s * *cf0++; |
| 40 | 40 |
v1 += s * *--cf1; |
| 41 | 41 |
} |
| 42 |
- *out++ = (v0 * scale) + bias; |
|
| 43 |
- *out2++ = (v1 * scale) + bias; |
|
| 42 |
+ *out++ = v0 * scale; |
|
| 43 |
+ *out2++ = v1 * scale; |
|
| 44 | 44 |
} |
| 45 | 45 |
} |
| 46 | 46 |
|
| ... | ... |
@@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, |
| 3776 | 3776 |
dst[i] = src0[i] * src1[i] + src2[i]; |
| 3777 | 3777 |
} |
| 3778 | 3778 |
|
| 3779 |
-void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
|
|
| 3779 |
+static void vector_fmul_window_c(float *dst, const float *src0, |
|
| 3780 |
+ const float *src1, const float *win, int len) |
|
| 3781 |
+{
|
|
| 3780 | 3782 |
int i,j; |
| 3781 | 3783 |
dst += len; |
| 3782 | 3784 |
win += len; |
| ... | ... |
@@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c |
| 3786 | 3786 |
float s1 = src1[j]; |
| 3787 | 3787 |
float wi = win[i]; |
| 3788 | 3788 |
float wj = win[j]; |
| 3789 |
- dst[i] = s0*wj - s1*wi + add_bias; |
|
| 3790 |
- dst[j] = s0*wi + s1*wj + add_bias; |
|
| 3789 |
+ dst[i] = s0*wj - s1*wi; |
|
| 3790 |
+ dst[j] = s0*wi + s1*wj; |
|
| 3791 | 3791 |
} |
| 3792 | 3792 |
} |
| 3793 | 3793 |
|
| ... | ... |
@@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
| 4434 | 4434 |
c->vector_fmul = vector_fmul_c; |
| 4435 | 4435 |
c->vector_fmul_reverse = vector_fmul_reverse_c; |
| 4436 | 4436 |
c->vector_fmul_add = vector_fmul_add_c; |
| 4437 |
- c->vector_fmul_window = ff_vector_fmul_window_c; |
|
| 4437 |
+ c->vector_fmul_window = vector_fmul_window_c; |
|
| 4438 | 4438 |
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
| 4439 | 4439 |
c->vector_clipf = vector_clipf_c; |
| 4440 | 4440 |
c->float_to_int16 = ff_float_to_int16_c; |
| ... | ... |
@@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul); |
| 68 | 68 |
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp); |
| 69 | 69 |
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); |
| 70 | 70 |
|
| 71 |
-void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, |
|
| 72 |
- const float *win, float add_bias, int len); |
|
| 73 |
- |
|
| 74 | 71 |
/* encoding scans */ |
| 75 | 72 |
extern const uint8_t ff_alternate_horizontal_scan[64]; |
| 76 | 73 |
extern const uint8_t ff_alternate_vertical_scan[64]; |
| ... | ... |
@@ -393,7 +390,7 @@ typedef struct DSPContext {
|
| 393 | 393 |
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */ |
| 394 | 394 |
void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len); |
| 395 | 395 |
/* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
| 396 |
- void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); |
|
| 396 |
+ void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len); |
|
| 397 | 397 |
/* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
| 398 | 398 |
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); |
| 399 | 399 |
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); |
| ... | ... |
@@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0, |
| 90 | 90 |
} |
| 91 | 91 |
} |
| 92 | 92 |
|
| 93 |
-static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len) |
|
| 93 |
+static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len) |
|
| 94 | 94 |
{
|
| 95 |
- union {
|
|
| 96 |
- vector float v; |
|
| 97 |
- float s[4]; |
|
| 98 |
- } vadd; |
|
| 99 |
- vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj; |
|
| 95 |
+ vector float zero, t0, t1, s0, s1, wi, wj; |
|
| 100 | 96 |
const vector unsigned char reverse = vcprm(3,2,1,0); |
| 101 | 97 |
int i,j; |
| 102 | 98 |
|
| ... | ... |
@@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa |
| 104 | 104 |
win += len; |
| 105 | 105 |
src0+= len; |
| 106 | 106 |
|
| 107 |
- vadd.s[0] = add_bias; |
|
| 108 |
- vadd_bias = vec_splat(vadd.v, 0); |
|
| 109 | 107 |
zero = (vector float)vec_splat_u32(0); |
| 110 | 108 |
|
| 111 | 109 |
for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
|
| ... | ... |
@@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa |
| 117 | 117 |
s1 = vec_perm(s1, s1, reverse); |
| 118 | 118 |
wj = vec_perm(wj, wj, reverse); |
| 119 | 119 |
|
| 120 |
- t0 = vec_madd(s0, wj, vadd_bias); |
|
| 120 |
+ t0 = vec_madd(s0, wj, zero); |
|
| 121 | 121 |
t0 = vec_nmsub(s1, wi, t0); |
| 122 |
- t1 = vec_madd(s0, wi, vadd_bias); |
|
| 122 |
+ t1 = vec_madd(s0, wi, zero); |
|
| 123 | 123 |
t1 = vec_madd(s1, wj, t1); |
| 124 | 124 |
t1 = vec_perm(t1, t1, reverse); |
| 125 | 125 |
|
| ... | ... |
@@ -24,7 +24,7 @@ |
| 24 | 24 |
static void synth_filter_float(FFTContext *imdct, |
| 25 | 25 |
float *synth_buf_ptr, int *synth_buf_offset, |
| 26 | 26 |
float synth_buf2[32], const float window[512], |
| 27 |
- float out[32], const float in[32], float scale, float bias) |
|
| 27 |
+ float out[32], const float in[32], float scale) |
|
| 28 | 28 |
{
|
| 29 | 29 |
float *synth_buf= synth_buf_ptr + *synth_buf_offset; |
| 30 | 30 |
int i, j; |
| ... | ... |
@@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct, |
| 48 | 48 |
c += window[i + j + 32]*( synth_buf[16 + i + j - 512]); |
| 49 | 49 |
d += window[i + j + 48]*( synth_buf[31 - i + j - 512]); |
| 50 | 50 |
} |
| 51 |
- out[i ] = a*scale + bias; |
|
| 52 |
- out[i + 16] = b*scale + bias; |
|
| 51 |
+ out[i ] = a*scale; |
|
| 52 |
+ out[i + 16] = b*scale; |
|
| 53 | 53 |
synth_buf2[i ] = c; |
| 54 | 54 |
synth_buf2[i + 16] = d; |
| 55 | 55 |
} |
| ... | ... |
@@ -28,7 +28,7 @@ typedef struct SynthFilterContext {
|
| 28 | 28 |
float *synth_buf_ptr, int *synth_buf_offset, |
| 29 | 29 |
float synth_buf2[32], const float window[512], |
| 30 | 30 |
float out[32], const float in[32], |
| 31 |
- float scale, float bias); |
|
| 31 |
+ float scale); |
|
| 32 | 32 |
} SynthFilterContext; |
| 33 | 33 |
|
| 34 | 34 |
void ff_synth_filter_init(SynthFilterContext *c); |
| ... | ... |
@@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) |
| 1575 | 1575 |
const float *win = vc->win[blockflag & previous_window]; |
| 1576 | 1576 |
|
| 1577 | 1577 |
if (blockflag == previous_window) {
|
| 1578 |
- vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4); |
|
| 1578 |
+ vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4); |
|
| 1579 | 1579 |
} else if (blockflag > previous_window) {
|
| 1580 |
- vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4); |
|
| 1580 |
+ vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4); |
|
| 1581 | 1581 |
memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float)); |
| 1582 | 1582 |
} else {
|
| 1583 | 1583 |
memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float)); |
| 1584 |
- vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4); |
|
| 1584 |
+ vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4); |
|
| 1585 | 1585 |
} |
| 1586 | 1586 |
memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float)); |
| 1587 | 1587 |
} |
| ... | ... |
@@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s) |
| 1031 | 1031 |
winlen >>= 1; |
| 1032 | 1032 |
|
| 1033 | 1033 |
s->dsp.vector_fmul_window(start, start, start + winlen, |
| 1034 |
- window, 0, winlen); |
|
| 1034 |
+ window, winlen); |
|
| 1035 | 1035 |
|
| 1036 | 1036 |
s->channel[c].prev_block_len = s->subframe_len; |
| 1037 | 1037 |
} |
| ... | ... |
@@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1 |
| 2190 | 2190 |
); |
| 2191 | 2191 |
} |
| 2192 | 2192 |
|
| 2193 |
-static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, |
|
| 2194 |
- const float *win, float add_bias, int len){
|
|
| 2195 | 2193 |
#if HAVE_6REGS |
| 2196 |
- if(add_bias == 0){
|
|
| 2194 |
+static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, |
|
| 2195 |
+ const float *win, int len){
|
|
| 2197 | 2196 |
x86_reg i = -len*4; |
| 2198 | 2197 |
x86_reg j = len*4-8; |
| 2199 | 2198 |
__asm__ volatile( |
| ... | ... |
@@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float |
| 2220 | 2220 |
:"+r"(i), "+r"(j) |
| 2221 | 2221 |
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len) |
| 2222 | 2222 |
); |
| 2223 |
- }else |
|
| 2224 |
-#endif |
|
| 2225 |
- ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); |
|
| 2226 | 2223 |
} |
| 2227 | 2224 |
|
| 2228 | 2225 |
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1, |
| 2229 |
- const float *win, float add_bias, int len){
|
|
| 2230 |
-#if HAVE_6REGS |
|
| 2231 |
- if(add_bias == 0){
|
|
| 2226 |
+ const float *win, int len){
|
|
| 2232 | 2227 |
x86_reg i = -len*4; |
| 2233 | 2228 |
x86_reg j = len*4-16; |
| 2234 | 2229 |
__asm__ volatile( |
| ... | ... |
@@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s |
| 2256 | 2256 |
:"+r"(i), "+r"(j) |
| 2257 | 2257 |
:"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len) |
| 2258 | 2258 |
); |
| 2259 |
- }else |
|
| 2260 |
-#endif |
|
| 2261 |
- ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); |
|
| 2262 | 2259 |
} |
| 2260 |
+#endif /* HAVE_6REGS */ |
|
| 2263 | 2261 |
|
| 2264 | 2262 |
static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len) |
| 2265 | 2263 |
{
|
| ... | ... |
@@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
| 2882 | 2882 |
} |
| 2883 | 2883 |
if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
|
| 2884 | 2884 |
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; |
| 2885 |
+#if HAVE_6REGS |
|
| 2885 | 2886 |
c->vector_fmul_window = vector_fmul_window_3dnow2; |
| 2887 |
+#endif |
|
| 2886 | 2888 |
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
| 2887 | 2889 |
c->float_to_int16_interleave = float_to_int16_interleave_3dn2; |
| 2888 | 2890 |
} |
| ... | ... |
@@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
| 2899 | 2899 |
c->vector_fmul = vector_fmul_sse; |
| 2900 | 2900 |
c->vector_fmul_reverse = vector_fmul_reverse_sse; |
| 2901 | 2901 |
c->vector_fmul_add = vector_fmul_add_sse; |
| 2902 |
+#if HAVE_6REGS |
|
| 2902 | 2903 |
c->vector_fmul_window = vector_fmul_window_sse; |
| 2904 |
+#endif |
|
| 2903 | 2905 |
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; |
| 2904 | 2906 |
c->vector_clipf = vector_clipf_sse; |
| 2905 | 2907 |
c->float_to_int16 = float_to_int16_sse; |