GitList

Browse code

ra144: use scalarproduct_int16

The buffer holding the coefficients must be padded with 0 so as to use DSP
functions that may overread. Currently, the SSE2/3 versions is an example,
as they process batches of 16 bytes.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

Christophe Gisquet authored on 2012/03/04 21:28:16
Showing 4 changed files

libavcodec/ra144.c index fe9a5bc..9929721 100644
libavcodec/ra144.h index 763495d..c2ee59b 100644
libavcodec/ra144dec.c index b7add7f..03ab9f5 100644
libavcodec/ra144enc.c index 3558254..71f206f 100644

libavcodec/ra144.c

History View file @ c3390fd

@@ -1681,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
+                     }
                      /** inverse root mean square */
                     -int ff_irms(const int16_t *data)
                     +int ff_irms(DSPContext *dsp, const int16_t *data)
+                     {
                     -    unsigned int i, sum = 0;
+                    -
                     -    for (i=0; i < BLOCKSIZE; i++)
                     -        sum += data[i] * data[i];
                     +    unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE);
                          if (sum == 0)
                              return 0; /* OOPS - division by zero */
@@ -1698,14 +1695,13 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                                                 int cba_idx, int cb1_idx, int cb2_idx,
                                                 int gval, int gain)
+                     {
                     -    int16_t buffer_a[BLOCKSIZE];
                          int16_t *block;
                          int m[3];
                          if (cba_idx) {
                              cba_idx += BLOCKSIZE/2 - 1;
                     -        ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx);
                     -        m[0] = (ff_irms(buffer_a) * gval) >> 12;
                     +        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx);
                     +        m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12;
                          } else {
                              m[0] = 0;
+                         }
@@ -1716,7 +1712,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                          block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE;
                     -    add_wav(block, gain, cba_idx, m, cba_idx? buffer_a: NULL,
                     +    add_wav(block, gain, cba_idx, m, cba_idx? ractx->buffer_a: NULL,
                                  ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]);
                          memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE,

libavcodec/ra144.h

History View file @ c3390fd

@@ -25,6 +25,7 @@
                      #include <stdint.h>
                      #include "lpc.h"
                      #include "audio_frame_queue.h"
                     +#include "dsputil.h"
                      #define NBLOCKS         4       ///< number of subblocks within a block
                      #define BLOCKSIZE       40      ///< subblock size in 16-bit words
@@ -35,6 +36,7 @@
                      typedef struct RA144Context {
                          AVCodecContext *avctx;
                     +    DSPContext dsp;
                          LPCContext lpc_ctx;
                          AudioFrameQueue afq;
                          int last_frame;
@@ -57,6 +59,8 @@ typedef struct RA144Context {
                          /** Adaptive codebook, its size is two units bigger to avoid a
                           *  buffer overflow. */
                          int16_t adapt_cb[146+2];
+                    +
                     +    DECLARE_ALIGNED(16, int16_t, buffer_a)[FFALIGN(BLOCKSIZE,16)];
                      } RA144Context;
                      void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset);
@@ -68,7 +72,7 @@ unsigned int ff_rms(const int *data);
                      int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
                                    int energy);
                      unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
                     -int ff_irms(const int16_t *data);
                     +int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/);
                      void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                                                 int cba_idx, int cb1_idx, int cb2_idx,
                                                 int gval, int gain);

libavcodec/ra144dec.c

History View file @ c3390fd

@@ -34,10 +34,13 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
                          RA144Context *ractx = avctx->priv_data;
                          ractx->avctx = avctx;
                     +    ff_dsputil_init(&ractx->dsp, avctx);
                          ractx->lpc_coef[0] = ractx->lpc_tables[0];
                          ractx->lpc_coef[1] = ractx->lpc_tables[1];
                     +    AV_ZERO128(ractx->buffer_a+BLOCKSIZE);
+                    +
                          avctx->channels       = 1;
                          avctx->channel_layout = AV_CH_LAYOUT_MONO;
                          avctx->sample_fmt     = AV_SAMPLE_FMT_S16;

libavcodec/ra144enc.c

History View file @ c3390fd

@@ -60,7 +60,9 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
                          ractx = avctx->priv_data;
                          ractx->lpc_coef[0] = ractx->lpc_tables[0];
                          ractx->lpc_coef[1] = ractx->lpc_tables[1];
                     +    AV_ZERO128(ractx->buffer_a+BLOCKSIZE);
                          ractx->avctx = avctx;
                     +    ff_dsputil_init(&ractx->dsp, avctx);
                          ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
                                            FF_LPC_TYPE_LEVINSON);
                          if (ret < 0)
@@ -334,7 +336,6 @@ static void ra144_encode_subblock(RA144Context *ractx,
                          float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE];
                          float coefs[LPC_ORDER];
                          float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE];
                     -    int16_t cba_vect[BLOCKSIZE];
                          int cba_idx, cb1_idx, cb2_idx, gain;
                          int i, n;
                          unsigned m[3];
@@ -373,8 +374,8 @@ static void ra144_encode_subblock(RA144Context *ractx,
                               */
                              memcpy(cba, work + LPC_ORDER, sizeof(cba));
                     -        ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
                     -        m[0] = (ff_irms(cba_vect) * rms) >> 12;
                     +        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
                     +        m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12;
+                         }
                          fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
                          for (i = 0; i < BLOCKSIZE; i++) {