Browse code

ra144: use scalarproduct_int16

The buffer holding the coefficients must be padded with 0 so as to use DSP
functions that may overread. Currently, the SSE2/3 versions is an example,
as they process batches of 16 bytes.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

Christophe Gisquet authored on 2012/03/04 21:28:16
Showing 4 changed files
... ...
@@ -1681,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
1681 1681
 }
1682 1682
 
1683 1683
 /** inverse root mean square */
1684
-int ff_irms(const int16_t *data)
1684
+int ff_irms(DSPContext *dsp, const int16_t *data)
1685 1685
 {
1686
-    unsigned int i, sum = 0;
1687
-
1688
-    for (i=0; i < BLOCKSIZE; i++)
1689
-        sum += data[i] * data[i];
1686
+    unsigned int sum = dsp->scalarproduct_int16(data, data, BLOCKSIZE);
1690 1687
 
1691 1688
     if (sum == 0)
1692 1689
         return 0; /* OOPS - division by zero */
... ...
@@ -1698,14 +1695,13 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
1698 1698
                            int cba_idx, int cb1_idx, int cb2_idx,
1699 1699
                            int gval, int gain)
1700 1700
 {
1701
-    int16_t buffer_a[BLOCKSIZE];
1702 1701
     int16_t *block;
1703 1702
     int m[3];
1704 1703
 
1705 1704
     if (cba_idx) {
1706 1705
         cba_idx += BLOCKSIZE/2 - 1;
1707
-        ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx);
1708
-        m[0] = (ff_irms(buffer_a) * gval) >> 12;
1706
+        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx);
1707
+        m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * gval) >> 12;
1709 1708
     } else {
1710 1709
         m[0] = 0;
1711 1710
     }
... ...
@@ -1716,7 +1712,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
1716 1716
 
1717 1717
     block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE;
1718 1718
 
1719
-    add_wav(block, gain, cba_idx, m, cba_idx? buffer_a: NULL,
1719
+    add_wav(block, gain, cba_idx, m, cba_idx? ractx->buffer_a: NULL,
1720 1720
             ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]);
1721 1721
 
1722 1722
     memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE,
... ...
@@ -25,6 +25,7 @@
25 25
 #include <stdint.h>
26 26
 #include "lpc.h"
27 27
 #include "audio_frame_queue.h"
28
+#include "dsputil.h"
28 29
 
29 30
 #define NBLOCKS         4       ///< number of subblocks within a block
30 31
 #define BLOCKSIZE       40      ///< subblock size in 16-bit words
... ...
@@ -35,6 +36,7 @@
35 35
 
36 36
 typedef struct RA144Context {
37 37
     AVCodecContext *avctx;
38
+    DSPContext dsp;
38 39
     LPCContext lpc_ctx;
39 40
     AudioFrameQueue afq;
40 41
     int last_frame;
... ...
@@ -57,6 +59,8 @@ typedef struct RA144Context {
57 57
     /** Adaptive codebook, its size is two units bigger to avoid a
58 58
      *  buffer overflow. */
59 59
     int16_t adapt_cb[146+2];
60
+
61
+    DECLARE_ALIGNED(16, int16_t, buffer_a)[FFALIGN(BLOCKSIZE,16)];
60 62
 } RA144Context;
61 63
 
62 64
 void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset);
... ...
@@ -68,7 +72,7 @@ unsigned int ff_rms(const int *data);
68 68
 int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
69 69
               int energy);
70 70
 unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
71
-int ff_irms(const int16_t *data);
71
+int ff_irms(DSPContext *dsp, const int16_t *data/*align 16*/);
72 72
 void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
73 73
                            int cba_idx, int cb1_idx, int cb2_idx,
74 74
                            int gval, int gain);
... ...
@@ -34,10 +34,13 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
34 34
     RA144Context *ractx = avctx->priv_data;
35 35
 
36 36
     ractx->avctx = avctx;
37
+    ff_dsputil_init(&ractx->dsp, avctx);
37 38
 
38 39
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
39 40
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
40 41
 
42
+    AV_ZERO128(ractx->buffer_a+BLOCKSIZE);
43
+
41 44
     avctx->channels       = 1;
42 45
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
43 46
     avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
... ...
@@ -60,7 +60,9 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
60 60
     ractx = avctx->priv_data;
61 61
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
62 62
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
63
+    AV_ZERO128(ractx->buffer_a+BLOCKSIZE);
63 64
     ractx->avctx = avctx;
65
+    ff_dsputil_init(&ractx->dsp, avctx);
64 66
     ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
65 67
                       FF_LPC_TYPE_LEVINSON);
66 68
     if (ret < 0)
... ...
@@ -334,7 +336,6 @@ static void ra144_encode_subblock(RA144Context *ractx,
334 334
     float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE];
335 335
     float coefs[LPC_ORDER];
336 336
     float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE];
337
-    int16_t cba_vect[BLOCKSIZE];
338 337
     int cba_idx, cb1_idx, cb2_idx, gain;
339 338
     int i, n;
340 339
     unsigned m[3];
... ...
@@ -373,8 +374,8 @@ static void ra144_encode_subblock(RA144Context *ractx,
373 373
          */
374 374
         memcpy(cba, work + LPC_ORDER, sizeof(cba));
375 375
 
376
-        ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
377
-        m[0] = (ff_irms(cba_vect) * rms) >> 12;
376
+        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
377
+        m[0] = (ff_irms(&ractx->dsp, ractx->buffer_a) * rms) >> 12;
378 378
     }
379 379
     fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
380 380
     for (i = 0; i < BLOCKSIZE; i++) {