Browse code

Remove unneeded add bias from 3 functions.

DSPContext.vector_fmul_window()
DCADSPContext.lfe_fir()
SynthFilterContext.synth_filter_float()

Signed-off-by: Mans Rullgard <mans@mansr.com>

Justin Ruggles authored on 2011/02/01 04:26:02
Showing 21 changed files
... ...
@@ -1721,19 +1721,19 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
1721 1721
      */
1722 1722
     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1723 1723
             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1724
-        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 0, 512);
1724
+        ac->dsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
1725 1725
     } else {
1726 1726
         memcpy(                        out,               saved,            448 * sizeof(float));
1727 1727
 
1728 1728
         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1729
-            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 0, 64);
1730
-            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      0, 64);
1731
-            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      0, 64);
1732
-            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      0, 64);
1733
-            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      0, 64);
1729
+            ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
1730
+            ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
1731
+            ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
1732
+            ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
1733
+            ac->dsp.vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
1734 1734
             memcpy(                    out + 448 + 4*128, temp, 64 * sizeof(float));
1735 1735
         } else {
1736
-            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 0, 64);
1736
+            ac->dsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
1737 1737
             memcpy(                    out + 576,         buf + 64,         448 * sizeof(float));
1738 1738
         }
1739 1739
     }
... ...
@@ -1741,9 +1741,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
1741 1741
     // buffer update
1742 1742
     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1743 1743
         memcpy(                    saved,       temp + 64,         64 * sizeof(float));
1744
-        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1745
-        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1746
-        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1744
+        ac->dsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
1745
+        ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
1746
+        ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
1747 1747
         memcpy(                    saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
1748 1748
     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1749 1749
         memcpy(                    saved,       buf + 512,        448 * sizeof(float));
... ...
@@ -628,13 +628,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
628 628
             for(i=0; i<128; i++)
629 629
                 x[i] = s->transform_coeffs[ch][2*i];
630 630
             ff_imdct_half(&s->imdct_256, s->tmp_output, x);
631
-            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
631
+            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
632 632
             for(i=0; i<128; i++)
633 633
                 x[i] = s->transform_coeffs[ch][2*i+1];
634 634
             ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
635 635
         } else {
636 636
             ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
637
-            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 0, 128);
637
+            s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
638 638
             memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
639 639
         }
640 640
     }
... ...
@@ -23,7 +23,7 @@
23 23
 #include "libavcodec/dcadsp.h"
24 24
 
25 25
 void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
26
-                         int decifactor, float scale, float bias);
26
+                         int decifactor, float scale);
27 27
 
28 28
 void av_cold ff_dcadsp_init_arm(DCADSPContext *s)
29 29
 {
... ...
@@ -29,7 +29,7 @@ function ff_dca_lfe_fir_neon, export=1
29 29
         cmp             r3,  #32
30 30
         moveq           r6,  #256/32
31 31
         movne           r6,  #256/64
32
-NOVFP   vldr            d0,  [sp, #16]          @ scale, bias
32
+NOVFP   vldr            s0,  [sp, #16]          @ scale
33 33
         mov             lr,  #-16
34 34
 1:
35 35
         vmov.f32        q2,  #0.0               @ v0
... ...
@@ -51,8 +51,7 @@ NOVFP   vldr            d0,  [sp, #16]          @ scale, bias
51 51
         vadd.f32        d4,  d4,  d5
52 52
         vadd.f32        d6,  d6,  d7
53 53
         vpadd.f32       d4,  d4,  d6
54
-        vdup.32         d5,  d0[1]
55
-        vmla.f32        d5,  d4,  d0[0]
54
+        vmul.f32        d5,  d4,  d0[0]
56 55
         vst1.32         {d5[0]},  [r0,:32]!
57 56
         vst1.32         {d5[1]},  [r4,:32]!
58 57
         bne             1b
... ...
@@ -140,8 +140,7 @@ void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
140 140
 
141 141
 void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
142 142
 void ff_vector_fmul_window_neon(float *dst, const float *src0,
143
-                                const float *src1, const float *win,
144
-                                float add_bias, int len);
143
+                                const float *src1, const float *win, int len);
145 144
 void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
146 145
                                 int len);
147 146
 void ff_vector_fmul_sv_scalar_2_neon(float *dst, const float *src,
... ...
@@ -777,11 +777,8 @@ function ff_vector_fmul_neon, export=1
777 777
 endfunc
778 778
 
779 779
 function ff_vector_fmul_window_neon, export=1
780
-VFP     vdup.32         q8,  d0[0]
781
-NOVFP   vld1.32         {d16[],d17[]}, [sp,:32]
782 780
         push            {r4,r5,lr}
783
-VFP     ldr             lr,  [sp, #12]
784
-NOVFP   ldr             lr,  [sp, #16]
781
+        ldr             lr,  [sp, #12]
785 782
         sub             r2,  r2,  #8
786 783
         sub             r5,  lr,  #2
787 784
         add             r2,  r2,  r5, lsl #2
... ...
@@ -793,14 +790,12 @@ NOVFP   ldr             lr,  [sp, #16]
793 793
         vld1.64         {d4,d5},  [r3,:128]!
794 794
         vld1.64         {d6,d7},  [r4,:128], r5
795 795
 1:      subs            lr,  lr,  #4
796
-        vmov            q11, q8
797
-        vmla.f32        d22, d0,  d4
798
-        vmov            q10, q8
799
-        vmla.f32        d23, d1,  d5
796
+        vmul.f32        d22, d0,  d4
800 797
         vrev64.32       q3,  q3
801
-        vmla.f32        d20, d0,  d7
798
+        vmul.f32        d23, d1,  d5
802 799
         vrev64.32       q1,  q1
803
-        vmla.f32        d21, d1,  d6
800
+        vmul.f32        d20, d0,  d7
801
+        vmul.f32        d21, d1,  d6
804 802
         beq             2f
805 803
         vmla.f32        d22, d3,  d7
806 804
         vld1.64         {d0,d1},  [r1,:128]!
... ...
@@ -34,7 +34,7 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
34 34
                                 float *synth_buf_ptr, int *synth_buf_offset,
35 35
                                 float synth_buf2[32], const float window[512],
36 36
                                 float out[32], const float in[32],
37
-                                float scale, float bias);
37
+                                float scale);
38 38
 
39 39
 av_cold void ff_fft_init_arm(FFTContext *s)
40 40
 {
... ...
@@ -42,7 +42,7 @@ VFP     vpop            {d0}
42 42
 
43 43
         ldr             r5,  [sp, #9*4]         @ window
44 44
         ldr             r2,  [sp, #10*4]        @ out
45
-NOVFP   vldr            d0,  [sp, #12*4]        @ scale, bias
45
+NOVFP   vldr            s0,  [sp, #12*4]        @ scale
46 46
         add             r8,  r9,  #12*4
47 47
 
48 48
         mov             lr,  #64*4
... ...
@@ -90,10 +90,8 @@ NOVFP   vldr            d0,  [sp, #12*4]        @ scale, bias
90 90
         sub             r11, r11, #512*4
91 91
         b               2b
92 92
 3:
93
-        vdup.32         q8,  d0[1]
94
-        vdup.32         q9,  d0[1]
95
-        vmla.f32        q8,  q10, d0[0]
96
-        vmla.f32        q9,  q1,  d0[0]
93
+        vmul.f32        q8,  q10, d0[0]
94
+        vmul.f32        q9,  q1,  d0[0]
97 95
         vst1.32         {q3},     [r3,:128]
98 96
         sub             r3,  r3,  #16*4
99 97
         vst1.32         {q2},     [r3,:128]
... ...
@@ -141,7 +141,7 @@ static int at1_imdct_block(AT1SUCtx* su, AT1Ctx *q)
141 141
 
142 142
             /* overlap and window */
143 143
             q->dsp.vector_fmul_window(&q->bands[band_num][start_pos], prev_buf,
144
-                                      &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 0, 16);
144
+                                      &su->spectrum[0][ref_pos + start_pos], ff_sine_32, 16);
145 145
 
146 146
             prev_buf = &su->spectrum[0][ref_pos+start_pos + 16];
147 147
             start_pos += block_size;
... ...
@@ -896,7 +896,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
896 896
         s->synth.synth_filter_float(&s->imdct,
897 897
                               s->subband_fir_hist[chans], &s->hist_index[chans],
898 898
                               s->subband_fir_noidea[chans], prCoeff,
899
-                              samples_out, s->raXin, scale, 0);
899
+                              samples_out, s->raXin, scale);
900 900
         samples_out+= 32;
901 901
 
902 902
     }
... ...
@@ -929,7 +929,7 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
929 929
     /* Interpolation */
930 930
     for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
931 931
         s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
932
-                          scale, 0);
932
+                          scale);
933 933
         samples_in++;
934 934
         samples_out += 2 * decifactor;
935 935
     }
... ...
@@ -23,7 +23,7 @@
23 23
 #include "dcadsp.h"
24 24
 
25 25
 static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
26
-                          int decifactor, float scale, float bias)
26
+                          int decifactor, float scale)
27 27
 {
28 28
     float *out2 = out + decifactor;
29 29
     const float *cf0 = coefs;
... ...
@@ -39,8 +39,8 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
39 39
             v0 += s * *cf0++;
40 40
             v1 += s * *--cf1;
41 41
         }
42
-        *out++  = (v0 * scale) + bias;
43
-        *out2++ = (v1 * scale) + bias;
42
+        *out++  = v0 * scale;
43
+        *out2++ = v1 * scale;
44 44
     }
45 45
 }
46 46
 
... ...
@@ -21,7 +21,7 @@
21 21
 
22 22
 typedef struct DCADSPContext {
23 23
     void (*lfe_fir)(float *out, const float *in, const float *coefs,
24
-                    int decifactor, float scale, float bias);
24
+                    int decifactor, float scale);
25 25
 } DCADSPContext;
26 26
 
27 27
 void ff_dcadsp_init(DCADSPContext *s);
... ...
@@ -3776,7 +3776,9 @@ static void vector_fmul_add_c(float *dst, const float *src0, const float *src1,
3776 3776
         dst[i] = src0[i] * src1[i] + src2[i];
3777 3777
 }
3778 3778
 
3779
-void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
3779
+static void vector_fmul_window_c(float *dst, const float *src0,
3780
+                                 const float *src1, const float *win, int len)
3781
+{
3780 3782
     int i,j;
3781 3783
     dst += len;
3782 3784
     win += len;
... ...
@@ -3786,8 +3788,8 @@ void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, c
3786 3786
         float s1 = src1[j];
3787 3787
         float wi = win[i];
3788 3788
         float wj = win[j];
3789
-        dst[i] = s0*wj - s1*wi + add_bias;
3790
-        dst[j] = s0*wi + s1*wj + add_bias;
3789
+        dst[i] = s0*wj - s1*wi;
3790
+        dst[j] = s0*wi + s1*wj;
3791 3791
     }
3792 3792
 }
3793 3793
 
... ...
@@ -4434,7 +4436,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
4434 4434
     c->vector_fmul = vector_fmul_c;
4435 4435
     c->vector_fmul_reverse = vector_fmul_reverse_c;
4436 4436
     c->vector_fmul_add = vector_fmul_add_c;
4437
-    c->vector_fmul_window = ff_vector_fmul_window_c;
4437
+    c->vector_fmul_window = vector_fmul_window_c;
4438 4438
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
4439 4439
     c->vector_clipf = vector_clipf_c;
4440 4440
     c->float_to_int16 = ff_float_to_int16_c;
... ...
@@ -68,9 +68,6 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
68 68
 void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
69 69
 void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
70 70
 
71
-void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
72
-                             const float *win, float add_bias, int len);
73
-
74 71
 /* encoding scans */
75 72
 extern const uint8_t ff_alternate_horizontal_scan[64];
76 73
 extern const uint8_t ff_alternate_vertical_scan[64];
... ...
@@ -393,7 +390,7 @@ typedef struct DSPContext {
393 393
     /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
394 394
     void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
395 395
     /* assume len is a multiple of 4, and arrays are 16-byte aligned */
396
-    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
396
+    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
397 397
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
398 398
     void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
399 399
     void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
... ...
@@ -90,13 +90,9 @@ static void vector_fmul_add_altivec(float *dst, const float *src0,
90 90
     }
91 91
 }
92 92
 
93
-static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
93
+static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len)
94 94
 {
95
-    union {
96
-        vector float v;
97
-        float s[4];
98
-    } vadd;
99
-    vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;
95
+    vector float zero, t0, t1, s0, s1, wi, wj;
100 96
     const vector unsigned char reverse = vcprm(3,2,1,0);
101 97
     int i,j;
102 98
 
... ...
@@ -104,8 +100,6 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
104 104
     win += len;
105 105
     src0+= len;
106 106
 
107
-    vadd.s[0] = add_bias;
108
-    vadd_bias = vec_splat(vadd.v, 0);
109 107
     zero = (vector float)vec_splat_u32(0);
110 108
 
111 109
     for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
... ...
@@ -117,9 +111,9 @@ static void vector_fmul_window_altivec(float *dst, const float *src0, const floa
117 117
         s1 = vec_perm(s1, s1, reverse);
118 118
         wj = vec_perm(wj, wj, reverse);
119 119
 
120
-        t0 = vec_madd(s0, wj, vadd_bias);
120
+        t0 = vec_madd(s0, wj, zero);
121 121
         t0 = vec_nmsub(s1, wi, t0);
122
-        t1 = vec_madd(s0, wi, vadd_bias);
122
+        t1 = vec_madd(s0, wi, zero);
123 123
         t1 = vec_madd(s1, wj, t1);
124 124
         t1 = vec_perm(t1, t1, reverse);
125 125
 
... ...
@@ -24,7 +24,7 @@
24 24
 static void synth_filter_float(FFTContext *imdct,
25 25
                            float *synth_buf_ptr, int *synth_buf_offset,
26 26
                            float synth_buf2[32], const float window[512],
27
-                           float out[32], const float in[32], float scale, float bias)
27
+                           float out[32], const float in[32], float scale)
28 28
 {
29 29
     float *synth_buf= synth_buf_ptr + *synth_buf_offset;
30 30
     int i, j;
... ...
@@ -48,8 +48,8 @@ static void synth_filter_float(FFTContext *imdct,
48 48
             c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
49 49
             d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
50 50
         }
51
-        out[i     ] = a*scale + bias;
52
-        out[i + 16] = b*scale + bias;
51
+        out[i     ] = a*scale;
52
+        out[i + 16] = b*scale;
53 53
         synth_buf2[i     ] = c;
54 54
         synth_buf2[i + 16] = d;
55 55
     }
... ...
@@ -28,7 +28,7 @@ typedef struct SynthFilterContext {
28 28
                                float *synth_buf_ptr, int *synth_buf_offset,
29 29
                                float synth_buf2[32], const float window[512],
30 30
                                float out[32], const float in[32],
31
-                               float scale, float bias);
31
+                               float scale);
32 32
 } SynthFilterContext;
33 33
 
34 34
 void ff_synth_filter_init(SynthFilterContext *c);
... ...
@@ -646,7 +646,6 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
646 646
                                      prev_buf + (bsize-wsize)/2,
647 647
                                      buf1 + bsize*j,
648 648
                                      ff_sine_windows[av_log2(wsize)],
649
-                                     0.0,
650 649
                                      wsize/2);
651 650
         out2 += wsize;
652 651
 
... ...
@@ -1575,13 +1575,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
1575 1575
         const float *win  = vc->win[blockflag & previous_window];
1576 1576
 
1577 1577
         if (blockflag == previous_window) {
1578
-            vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, blocksize / 4);
1578
+            vc->dsp.vector_fmul_window(ret, saved, buf, win, blocksize / 4);
1579 1579
         } else if (blockflag > previous_window) {
1580
-            vc->dsp.vector_fmul_window(ret, saved, buf, win, 0, bs0 / 4);
1580
+            vc->dsp.vector_fmul_window(ret, saved, buf, win, bs0 / 4);
1581 1581
             memcpy(ret+bs0/2, buf+bs0/4, ((bs1-bs0)/4) * sizeof(float));
1582 1582
         } else {
1583 1583
             memcpy(ret, saved, ((bs1 - bs0) / 4) * sizeof(float));
1584
-            vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, 0, bs0 / 4);
1584
+            vc->dsp.vector_fmul_window(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
1585 1585
         }
1586 1586
         memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(float));
1587 1587
     }
... ...
@@ -1031,7 +1031,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
1031 1031
         winlen >>= 1;
1032 1032
 
1033 1033
         s->dsp.vector_fmul_window(start, start, start + winlen,
1034
-                                  window, 0, winlen);
1034
+                                  window, winlen);
1035 1035
 
1036 1036
         s->channel[c].prev_block_len = s->subframe_len;
1037 1037
     }
... ...
@@ -2190,10 +2190,9 @@ static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1
2190 2190
     );
2191 2191
 }
2192 2192
 
2193
-static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
2194
-                                      const float *win, float add_bias, int len){
2195 2193
 #if HAVE_6REGS
2196
-    if(add_bias == 0){
2194
+static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
2195
+                                      const float *win, int len){
2197 2196
         x86_reg i = -len*4;
2198 2197
         x86_reg j = len*4-8;
2199 2198
         __asm__ volatile(
... ...
@@ -2220,15 +2219,10 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float
2220 2220
             :"+r"(i), "+r"(j)
2221 2221
             :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
2222 2222
         );
2223
-    }else
2224
-#endif
2225
-        ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
2226 2223
 }
2227 2224
 
2228 2225
 static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
2229
-                                   const float *win, float add_bias, int len){
2230
-#if HAVE_6REGS
2231
-    if(add_bias == 0){
2226
+                                   const float *win, int len){
2232 2227
         x86_reg i = -len*4;
2233 2228
         x86_reg j = len*4-16;
2234 2229
         __asm__ volatile(
... ...
@@ -2256,10 +2250,8 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s
2256 2256
             :"+r"(i), "+r"(j)
2257 2257
             :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
2258 2258
         );
2259
-    }else
2260
-#endif
2261
-        ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
2262 2259
 }
2260
+#endif /* HAVE_6REGS */
2263 2261
 
2264 2262
 static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
2265 2263
 {
... ...
@@ -2882,7 +2874,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2882 2882
         }
2883 2883
         if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
2884 2884
             c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
2885
+#if HAVE_6REGS
2885 2886
             c->vector_fmul_window = vector_fmul_window_3dnow2;
2887
+#endif
2886 2888
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2887 2889
                 c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
2888 2890
             }
... ...
@@ -2899,7 +2893,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2899 2899
             c->vector_fmul = vector_fmul_sse;
2900 2900
             c->vector_fmul_reverse = vector_fmul_reverse_sse;
2901 2901
             c->vector_fmul_add = vector_fmul_add_sse;
2902
+#if HAVE_6REGS
2902 2903
             c->vector_fmul_window = vector_fmul_window_sse;
2904
+#endif
2903 2905
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
2904 2906
             c->vector_clipf = vector_clipf_sse;
2905 2907
             c->float_to_int16 = float_to_int16_sse;