GitList

Browse code

float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil

Justin Ruggles authored on 2012/06/09 02:49:56
Showing 9 changed files

libavcodec/arm/dsputil_init_neon.c index 398326c..65db20d 100644
libavcodec/arm/dsputil_neon.S index 9a5a40d..358ed61 100644
libavcodec/dca.c index 103f058..b37dc49 100644
libavcodec/dsputil.c index 942f606..15f184e 100644
libavcodec/dsputil.h index ec3d7ee..e54ae69 100644
libavutil/arm/float_dsp_init_neon.c index fa6d0d7..3ca0288 100644
libavutil/arm/float_dsp_neon.S index d66fa09..03b1643 100644
libavutil/float_dsp.c index 039dd07..2e90939 100644
libavutil/float_dsp.h index 30161a2..4e26630 100644

@@ -154,8 +154,6 @@ void ff_vector_fmul_window_neon(float *dst, const float *src0,
                                                      const float *src1, const float *win, int len);
                      void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
                                                      int len);
                     -void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
                     -                                int len);
                      void ff_butterflies_float_neon(float *v1, float *v2, int len);
                      float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
                      void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
@@ -329,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
                          c->vector_fmul_window         = ff_vector_fmul_window_neon;
                          c->vector_fmul_scalar         = ff_vector_fmul_scalar_neon;
                     -    c->vector_fmac_scalar         = ff_vector_fmac_scalar_neon;
                          c->butterflies_float          = ff_butterflies_float_neon;
                          c->scalarproduct_float        = ff_scalarproduct_float_neon;
                          c->vector_fmul_reverse        = ff_vector_fmul_reverse_neon;

libavcodec/arm/dsputil_neon.S

History View file @ cb5042d

@@ -682,54 +682,6 @@ NOVFP   vdup.32         q8,  r2
                              .unreq          len
                      endfunc
                     -function ff_vector_fmac_scalar_neon, export=1
                     -VFP     len .req r2
                     -VFP     acc .req r3
                     -NOVFP   len .req r3
                     -NOVFP   acc .req r2
                     -VFP     vdup.32         q15, d0[0]
                     -NOVFP   vdup.32         q15, r2
                     -        bics            r12, len, #15
                     -        mov             acc, r0
                     -        beq             3f
                     -        vld1.32         {q0},     [r1,:128]!
                     -        vld1.32         {q8},     [acc,:128]!
                     -        vld1.32         {q1},     [r1,:128]!
                     -        vld1.32         {q9},     [acc,:128]!
                     -1:      vmla.f32        q8,  q0,  q15
                     -        vld1.32         {q2},     [r1,:128]!
                     -        vld1.32         {q10},    [acc,:128]!
                     -        vmla.f32        q9,  q1,  q15
                     -        vld1.32         {q3},     [r1,:128]!
                     -        vld1.32         {q11},    [acc,:128]!
                     -        vmla.f32        q10, q2,  q15
                     -        vst1.32         {q8},     [r0,:128]!
                     -        vmla.f32        q11, q3,  q15
                     -        vst1.32         {q9},     [r0,:128]!
                     -        subs            r12, r12, #16
                     -        beq             2f
                     -        vld1.32         {q0},     [r1,:128]!
                     -        vld1.32         {q8},     [acc,:128]!
                     -        vst1.32         {q10},    [r0,:128]!
                     -        vld1.32         {q1},     [r1,:128]!
                     -        vld1.32         {q9},     [acc,:128]!
                     -        vst1.32         {q11},    [r0,:128]!
                     -        b               1b
                     -2:      vst1.32         {q10},    [r0,:128]!
                     -        vst1.32         {q11},    [r0,:128]!
                     -        ands            len, len, #15
                     -        it              eq
                     -        bxeq            lr
                     -3:      vld1.32         {q0},     [r1,:128]!
                     -        vld1.32         {q8},     [acc,:128]!
                     -        vmla.f32        q8,  q0,  q15
                     -        vst1.32         {q8},     [r0,:128]!
                     -        subs            len, len, #4
                     -        bgt             3b
                     -        bx              lr
                     -        .unreq          len
                     -endfunc
+                    -
                      function ff_butterflies_float_neon, export=1
 :      vld1.32         {q0},[r0,:128]
                              vld1.32         {q1},[r1,:128]

libavcodec/dca.c

History View file @ cb5042d

@@ -27,6 +27,7 @@
                      #include <stdio.h>
                      #include "libavutil/common.h"
                     +#include "libavutil/float_dsp.h"
                      #include "libavutil/intmath.h"
                      #include "libavutil/intreadwrite.h"
                      #include "libavutil/mathematics.h"
@@ -383,7 +384,7 @@ typedef struct {
                          int profile;
                          int debug_flag;             ///< used for suppressing repeated error messages output
                     -    DSPContext dsp;
                     +    AVFloatDSPContext fdsp;
                          FFTContext imdct;
                          SynthFilterContext synth;
                          DCADSPContext dcadsp;
@@ -1865,8 +1866,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
                                  float *back_chan = s->samples + s->channel_order_tab[s->xch_base_channel]     * 256;
                                  float *lt_chan   = s->samples + s->channel_order_tab[s->xch_base_channel - 2] * 256;
                                  float *rt_chan   = s->samples + s->channel_order_tab[s->xch_base_channel - 1] * 256;
                     -            s->dsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
                     -            s->dsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
                     +            s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
                     +            s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
+                             }
                              if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) {
@@ -1908,7 +1909,7 @@ static av_cold int dca_decode_init(AVCodecContext *avctx)
                          s->avctx = avctx;
                          dca_init_vlcs();
                     -    ff_dsputil_init(&s->dsp, avctx);
                     +    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
                          ff_mdct_init(&s->imdct, 6, 1, 1.0);
                          ff_synth_filter_init(&s->synth);
                          ff_dcadsp_init(&s->dcadsp);

libavcodec/dsputil.c

History View file @ cb5042d

@@ -2401,14 +2401,6 @@ static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
                              dst[i] = src[i] * mul;
+                     }
                     -static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
                     -                                 int len)
                     -{
                     -    int i;
                     -    for (i = 0; i < len; i++)
                     -        dst[i] += src[i] * mul;
                     -}
+                    -
                      static void butterflies_float_c(float *restrict v1, float *restrict v2,
                                                      int len)
+                     {
@@ -2904,7 +2896,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
                          c->butterflies_float = butterflies_float_c;
                          c->butterflies_float_interleave = butterflies_float_interleave_c;
                          c->vector_fmul_scalar = vector_fmul_scalar_c;
                     -    c->vector_fmac_scalar = vector_fmac_scalar_c;
                          c->shrink[0]= av_image_copy_plane;
                          c->shrink[1]= ff_shrink22;

libavcodec/dsputil.h

History View file @ cb5042d

@@ -417,17 +417,6 @@ typedef struct DSPContext {
                          void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
                                                     int len);
                          /**
                     -     * Multiply a vector of floats by a scalar float and add to
                     -     * destination vector.  Source and destination vectors must
                     -     * overlap exactly or not at all.
                     -     * @param dst result vector, 16-byte aligned
                     -     * @param src input vector, 16-byte aligned
                     -     * @param mul scalar value
                     -     * @param len length of vector, multiple of 4
                     -     */
                     -    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
                     -                               int len);
                     -    /**
                           * Calculate the scalar product of two vectors of floats.
                           * @param v1  first vector, 16-byte aligned
                           * @param v2  second vector, 16-byte aligned

libavutil/arm/float_dsp_init_neon.c

History View file @ cb5042d

@@ -26,7 +26,11 @@
                      void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len);
                     +void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
                     +                                int len);
+                    +
                      void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
+                     {
                          fdsp->vector_fmul = ff_vector_fmul_neon;
                     +    fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon;
+                     }

libavutil/arm/float_dsp_neon.S

History View file @ cb5042d

@@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1
 :      vst1.32         {d16-d19},[r0,:128]!
                              bx              lr
                      endfunc
+                    +
                     +function ff_vector_fmac_scalar_neon, export=1
                     +VFP     len .req r2
                     +VFP     acc .req r3
                     +NOVFP   len .req r3
                     +NOVFP   acc .req r2
                     +VFP     vdup.32         q15, d0[0]
                     +NOVFP   vdup.32         q15, r2
                     +        bics            r12, len, #15
                     +        mov             acc, r0
                     +        beq             3f
                     +        vld1.32         {q0},     [r1,:128]!
                     +        vld1.32         {q8},     [acc,:128]!
                     +        vld1.32         {q1},     [r1,:128]!
                     +        vld1.32         {q9},     [acc,:128]!
                     +1:      vmla.f32        q8,  q0,  q15
                     +        vld1.32         {q2},     [r1,:128]!
                     +        vld1.32         {q10},    [acc,:128]!
                     +        vmla.f32        q9,  q1,  q15
                     +        vld1.32         {q3},     [r1,:128]!
                     +        vld1.32         {q11},    [acc,:128]!
                     +        vmla.f32        q10, q2,  q15
                     +        vst1.32         {q8},     [r0,:128]!
                     +        vmla.f32        q11, q3,  q15
                     +        vst1.32         {q9},     [r0,:128]!
                     +        subs            r12, r12, #16
                     +        beq             2f
                     +        vld1.32         {q0},     [r1,:128]!
                     +        vld1.32         {q8},     [acc,:128]!
                     +        vst1.32         {q10},    [r0,:128]!
                     +        vld1.32         {q1},     [r1,:128]!
                     +        vld1.32         {q9},     [acc,:128]!
                     +        vst1.32         {q11},    [r0,:128]!
                     +        b               1b
                     +2:      vst1.32         {q10},    [r0,:128]!
                     +        vst1.32         {q11},    [r0,:128]!
                     +        ands            len, len, #15
                     +        it              eq
                     +        bxeq            lr
                     +3:      vld1.32         {q0},     [r1,:128]!
                     +        vld1.32         {q8},     [acc,:128]!
                     +        vmla.f32        q8,  q0,  q15
                     +        vst1.32         {q8},     [r0,:128]!
                     +        subs            len, len, #4
                     +        bgt             3b
                     +        bx              lr
                     +        .unreq          len
                     +endfunc

libavutil/float_dsp.c

History View file @ cb5042d

@@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1,
                              dst[i] = src0[i] * src1[i];
+                     }
                     +static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
                     +                                 int len)
                     +{
                     +    int i;
                     +    for (i = 0; i < len; i++)
                     +        dst[i] += src[i] * mul;
                     +}
+                    +
                      void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
+                     {
                          fdsp->vector_fmul = vector_fmul_c;
                     +    fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
                      #if ARCH_ARM
                          ff_float_dsp_init_arm(fdsp);

libavutil/float_dsp.h

History View file @ cb5042d

@@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext {
                           */
                          void (*vector_fmul)(float *dst, const float *src0, const float *src1,
                                              int len);
+                    +
                     +    /**
                     +     * Multiply a vector of floats by a scalar float and add to
                     +     * destination vector.  Source and destination vectors must
                     +     * overlap exactly or not at all.
                     +     *
                     +     * @param dst result vector
                     +     *            constraints: 16-byte aligned
                     +     * @param src input vector
                     +     *            constraints: 16-byte aligned
                     +     * @param mul scalar value
                     +     * @param len length of vector
                     +     *            constraints: multiple of 4
                     +     */
                     +    void (*vector_fmac_scalar)(float *dst, const float *src, float mul,
                     +                               int len);
                      } AVFloatDSPContext;
                      /**