* qatar/master:
aacenc: Fix LONG_START windowing.
aacenc: Fix a bug where deinterleaved samples were stored in the wrong place.
avplay: use the correct array size for stride.
lavc: extend doxy for avcodec_alloc_context3().
APIchanges: mention avcodec_alloc_context()/2/3
avcodec_align_dimensions2: set only 4 linesizes, not AV_NUM_DATA_POINTERS.
aacsbr: ARM NEON optimised sbrdsp functions
aacsbr: align some arrays
aacsbr: move some simdable loops to function pointers
cosmetics: Remove extra newlines at EOF
Conflicts:
libavcodec/utils.c
libavfilter/formats.c
libavutil/mem.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
| ... | ... |
@@ -223,6 +223,8 @@ API changes, most recent first: |
| 223 | 223 |
|
| 224 | 224 |
2011-07-10 - 0b950fe - lavc 53.8.0 |
| 225 | 225 |
Add avcodec_open2(), deprecate avcodec_open(). |
| 226 |
+ Add avcodec_alloc_context3. Deprecate avcodec_alloc_context() and |
|
| 227 |
+ avcodec_alloc_context2(). |
|
| 226 | 228 |
|
| 227 | 229 |
2011-07-01 - b442ca6 - lavf 53.5.0 - avformat.h |
| 228 | 230 |
Add function av_get_output_timestamp(). |
| ... | ... |
@@ -56,7 +56,8 @@ OBJS-$(CONFIG_VDPAU) += vdpau.o |
| 56 | 56 |
OBJS-$(CONFIG_A64MULTI_ENCODER) += a64multienc.o elbg.o |
| 57 | 57 |
OBJS-$(CONFIG_A64MULTI5_ENCODER) += a64multienc.o elbg.o |
| 58 | 58 |
OBJS-$(CONFIG_AAC_DECODER) += aacdec.o aactab.o aacsbr.o aacps.o \ |
| 59 |
- aacadtsdec.o mpeg4audio.o kbdwin.o |
|
| 59 |
+ aacadtsdec.o mpeg4audio.o kbdwin.o \ |
|
| 60 |
+ sbrdsp.o |
|
| 60 | 61 |
OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o \ |
| 61 | 62 |
aacpsy.o aactab.o \ |
| 62 | 63 |
psymodel.o iirfilter.o \ |
| ... | ... |
@@ -200,8 +200,8 @@ WINDOW_FUNC(long_start) |
| 200 | 200 |
float *out = sce->ret; |
| 201 | 201 |
|
| 202 | 202 |
dsp->vector_fmul(out, audio, lwindow, 1024); |
| 203 |
- memcpy(out + 1024, audio, sizeof(out[0]) * 448); |
|
| 204 |
- dsp->vector_fmul_reverse(out + 1024 + 448, audio, swindow, 128); |
|
| 203 |
+ memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448); |
|
| 204 |
+ dsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128); |
|
| 205 | 205 |
memset(out + 1024 + 576, 0, sizeof(out[0]) * 448); |
| 206 | 206 |
} |
| 207 | 207 |
|
| ... | ... |
@@ -487,10 +487,10 @@ static void deinterleave_input_samples(AACEncContext *s, |
| 487 | 487 |
const float *sptr = samples + channel_map[ch]; |
| 488 | 488 |
|
| 489 | 489 |
/* copy last 1024 samples of previous frame to the start of the current frame */ |
| 490 |
- memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][1024], 1024 * sizeof(s->planar_samples[0][0])); |
|
| 490 |
+ memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0])); |
|
| 491 | 491 |
|
| 492 | 492 |
/* deinterleave */ |
| 493 |
- for (i = 1024; i < 1024 * 2; i++) {
|
|
| 493 |
+ for (i = 2048; i < 3072; i++) {
|
|
| 494 | 494 |
s->planar_samples[ch][i] = *sptr; |
| 495 | 495 |
sptr += sinc; |
| 496 | 496 |
} |
| ... | ... |
@@ -32,6 +32,7 @@ |
| 32 | 32 |
#include "aacsbrdata.h" |
| 33 | 33 |
#include "fft.h" |
| 34 | 34 |
#include "aacps.h" |
| 35 |
+#include "sbrdsp.h" |
|
| 35 | 36 |
#include "libavutil/libm.h" |
| 36 | 37 |
#include "libavutil/avassert.h" |
| 37 | 38 |
|
| ... | ... |
@@ -144,6 +145,7 @@ av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr) |
| 144 | 144 |
ff_mdct_init(&sbr->mdct, 7, 1, 1.0 / (64 * mdct_scale)); |
| 145 | 145 |
ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * mdct_scale); |
| 146 | 146 |
ff_ps_ctx_init(&sbr->ps); |
| 147 |
+ ff_sbrdsp_init(&sbr->dsp); |
|
| 147 | 148 |
} |
| 148 | 149 |
|
| 149 | 150 |
av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr) |
| ... | ... |
@@ -1143,33 +1145,21 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac) |
| 1143 | 1143 |
* @param x pointer to the beginning of the first sample window |
| 1144 | 1144 |
* @param W array of complex-valued samples split into subbands |
| 1145 | 1145 |
*/ |
| 1146 |
-static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct, const float *in, float *x, |
|
| 1146 |
+static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct, |
|
| 1147 |
+ SBRDSPContext *sbrdsp, const float *in, float *x, |
|
| 1147 | 1148 |
float z[320], float W[2][32][32][2]) |
| 1148 | 1149 |
{
|
| 1149 |
- int i, k; |
|
| 1150 |
+ int i; |
|
| 1150 | 1151 |
memcpy(W[0], W[1], sizeof(W[0])); |
| 1151 | 1152 |
memcpy(x , x+1024, (320-32)*sizeof(x[0])); |
| 1152 | 1153 |
memcpy(x+288, in, 1024*sizeof(x[0])); |
| 1153 | 1154 |
for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
|
| 1154 | 1155 |
// are not supported |
| 1155 | 1156 |
dsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320); |
| 1156 |
- for (k = 0; k < 64; k++) {
|
|
| 1157 |
- float f = z[k] + z[k + 64] + z[k + 128] + z[k + 192] + z[k + 256]; |
|
| 1158 |
- z[k] = f; |
|
| 1159 |
- } |
|
| 1160 |
- //Shuffle to IMDCT |
|
| 1161 |
- z[64] = z[0]; |
|
| 1162 |
- for (k = 1; k < 32; k++) {
|
|
| 1163 |
- z[64+2*k-1] = z[ k]; |
|
| 1164 |
- z[64+2*k ] = -z[64-k]; |
|
| 1165 |
- } |
|
| 1166 |
- z[64+63] = z[32]; |
|
| 1167 |
- |
|
| 1157 |
+ sbrdsp->sum64x5(z); |
|
| 1158 |
+ sbrdsp->qmf_pre_shuffle(z); |
|
| 1168 | 1159 |
mdct->imdct_half(mdct, z, z+64); |
| 1169 |
- for (k = 0; k < 32; k++) {
|
|
| 1170 |
- W[1][i][k][0] = -z[63-k]; |
|
| 1171 |
- W[1][i][k][1] = z[k]; |
|
| 1172 |
- } |
|
| 1160 |
+ sbrdsp->qmf_post_shuffle(W[1][i], z); |
|
| 1173 | 1161 |
x += 32; |
| 1174 | 1162 |
} |
| 1175 | 1163 |
} |
| ... | ... |
@@ -1179,6 +1169,7 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct, const float *in, |
| 1179 | 1179 |
* (14496-3 sp04 p206) |
| 1180 | 1180 |
*/ |
| 1181 | 1181 |
static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct, |
| 1182 |
+ SBRDSPContext *sbrdsp, |
|
| 1182 | 1183 |
float *out, float X[2][38][64], |
| 1183 | 1184 |
float mdct_buf[2][64], |
| 1184 | 1185 |
float *v0, int *v_off, const unsigned int div) |
| ... | ... |
@@ -1202,20 +1193,12 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct, |
| 1202 | 1202 |
X[0][i][32+n] = X[1][i][31-n]; |
| 1203 | 1203 |
} |
| 1204 | 1204 |
mdct->imdct_half(mdct, mdct_buf[0], X[0][i]); |
| 1205 |
- for (n = 0; n < 32; n++) {
|
|
| 1206 |
- v[ n] = mdct_buf[0][63 - 2*n]; |
|
| 1207 |
- v[63 - n] = -mdct_buf[0][62 - 2*n]; |
|
| 1208 |
- } |
|
| 1205 |
+ sbrdsp->qmf_deint_neg(v, mdct_buf[0]); |
|
| 1209 | 1206 |
} else {
|
| 1210 |
- for (n = 1; n < 64; n+=2) {
|
|
| 1211 |
- X[1][i][n] = -X[1][i][n]; |
|
| 1212 |
- } |
|
| 1207 |
+ sbrdsp->neg_odd_64(X[1][i]); |
|
| 1213 | 1208 |
mdct->imdct_half(mdct, mdct_buf[0], X[0][i]); |
| 1214 | 1209 |
mdct->imdct_half(mdct, mdct_buf[1], X[1][i]); |
| 1215 |
- for (n = 0; n < 64; n++) {
|
|
| 1216 |
- v[ n] = -mdct_buf[0][63 - n] + mdct_buf[1][ n ]; |
|
| 1217 |
- v[127 - n] = mdct_buf[0][63 - n] + mdct_buf[1][ n ]; |
|
| 1218 |
- } |
|
| 1210 |
+ sbrdsp->qmf_deint_bfly(v, mdct_buf[1], mdct_buf[0]); |
|
| 1219 | 1211 |
} |
| 1220 | 1212 |
dsp->vector_fmul_add(out, v , sbr_qmf_window , zero64, 64 >> div); |
| 1221 | 1213 |
dsp->vector_fmul_add(out, v + ( 192 >> div), sbr_qmf_window + ( 64 >> div), out , 64 >> div); |
| ... | ... |
@@ -1231,45 +1214,20 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct, |
| 1231 | 1231 |
} |
| 1232 | 1232 |
} |
| 1233 | 1233 |
|
| 1234 |
-static void autocorrelate(const float x[40][2], float phi[3][2][2], int lag) |
|
| 1235 |
-{
|
|
| 1236 |
- int i; |
|
| 1237 |
- float real_sum = 0.0f; |
|
| 1238 |
- float imag_sum = 0.0f; |
|
| 1239 |
- if (lag) {
|
|
| 1240 |
- for (i = 1; i < 38; i++) {
|
|
| 1241 |
- real_sum += x[i][0] * x[i+lag][0] + x[i][1] * x[i+lag][1]; |
|
| 1242 |
- imag_sum += x[i][0] * x[i+lag][1] - x[i][1] * x[i+lag][0]; |
|
| 1243 |
- } |
|
| 1244 |
- phi[2-lag][1][0] = real_sum + x[ 0][0] * x[lag][0] + x[ 0][1] * x[lag][1]; |
|
| 1245 |
- phi[2-lag][1][1] = imag_sum + x[ 0][0] * x[lag][1] - x[ 0][1] * x[lag][0]; |
|
| 1246 |
- if (lag == 1) {
|
|
| 1247 |
- phi[0][0][0] = real_sum + x[38][0] * x[39][0] + x[38][1] * x[39][1]; |
|
| 1248 |
- phi[0][0][1] = imag_sum + x[38][0] * x[39][1] - x[38][1] * x[39][0]; |
|
| 1249 |
- } |
|
| 1250 |
- } else {
|
|
| 1251 |
- for (i = 1; i < 38; i++) {
|
|
| 1252 |
- real_sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; |
|
| 1253 |
- } |
|
| 1254 |
- phi[2][1][0] = real_sum + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1]; |
|
| 1255 |
- phi[1][0][0] = real_sum + x[38][0] * x[38][0] + x[38][1] * x[38][1]; |
|
| 1256 |
- } |
|
| 1257 |
-} |
|
| 1258 |
- |
|
| 1259 | 1234 |
/** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering |
| 1260 | 1235 |
* (14496-3 sp04 p214) |
| 1261 | 1236 |
* Warning: This routine does not seem numerically stable. |
| 1262 | 1237 |
*/ |
| 1263 |
-static void sbr_hf_inverse_filter(float (*alpha0)[2], float (*alpha1)[2], |
|
| 1238 |
+static void sbr_hf_inverse_filter(SBRDSPContext *dsp, |
|
| 1239 |
+ float (*alpha0)[2], float (*alpha1)[2], |
|
| 1264 | 1240 |
const float X_low[32][40][2], int k0) |
| 1265 | 1241 |
{
|
| 1266 | 1242 |
int k; |
| 1267 | 1243 |
for (k = 0; k < k0; k++) {
|
| 1268 |
- float phi[3][2][2], dk; |
|
| 1244 |
+ LOCAL_ALIGNED_16(float, phi, [3], [2][2]); |
|
| 1245 |
+ float dk; |
|
| 1269 | 1246 |
|
| 1270 |
- autocorrelate(X_low[k], phi, 0); |
|
| 1271 |
- autocorrelate(X_low[k], phi, 1); |
|
| 1272 |
- autocorrelate(X_low[k], phi, 2); |
|
| 1247 |
+ dsp->autocorrelate(X_low[k], phi); |
|
| 1273 | 1248 |
|
| 1274 | 1249 |
dk = phi[2][1][0] * phi[1][0][0] - |
| 1275 | 1250 |
(phi[1][1][0] * phi[1][1][0] + phi[1][1][1] * phi[1][1][1]) / 1.000001f; |
| ... | ... |
@@ -1365,12 +1323,11 @@ static int sbr_hf_gen(AACContext *ac, SpectralBandReplication *sbr, |
| 1365 | 1365 |
const float bw_array[5], const uint8_t *t_env, |
| 1366 | 1366 |
int bs_num_env) |
| 1367 | 1367 |
{
|
| 1368 |
- int i, j, x; |
|
| 1368 |
+ int j, x; |
|
| 1369 | 1369 |
int g = 0; |
| 1370 | 1370 |
int k = sbr->kx[1]; |
| 1371 | 1371 |
for (j = 0; j < sbr->num_patches; j++) {
|
| 1372 | 1372 |
for (x = 0; x < sbr->patch_num_subbands[j]; x++, k++) {
|
| 1373 |
- float alpha[4]; |
|
| 1374 | 1373 |
const int p = sbr->patch_start_subband[j] + x; |
| 1375 | 1374 |
while (g <= sbr->n_q && k >= sbr->f_tablenoise[g]) |
| 1376 | 1375 |
g++; |
| ... | ... |
@@ -1382,26 +1339,10 @@ static int sbr_hf_gen(AACContext *ac, SpectralBandReplication *sbr, |
| 1382 | 1382 |
return -1; |
| 1383 | 1383 |
} |
| 1384 | 1384 |
|
| 1385 |
- alpha[0] = alpha1[p][0] * bw_array[g] * bw_array[g]; |
|
| 1386 |
- alpha[1] = alpha1[p][1] * bw_array[g] * bw_array[g]; |
|
| 1387 |
- alpha[2] = alpha0[p][0] * bw_array[g]; |
|
| 1388 |
- alpha[3] = alpha0[p][1] * bw_array[g]; |
|
| 1389 |
- |
|
| 1390 |
- for (i = 2 * t_env[0]; i < 2 * t_env[bs_num_env]; i++) {
|
|
| 1391 |
- const int idx = i + ENVELOPE_ADJUSTMENT_OFFSET; |
|
| 1392 |
- X_high[k][idx][0] = |
|
| 1393 |
- X_low[p][idx - 2][0] * alpha[0] - |
|
| 1394 |
- X_low[p][idx - 2][1] * alpha[1] + |
|
| 1395 |
- X_low[p][idx - 1][0] * alpha[2] - |
|
| 1396 |
- X_low[p][idx - 1][1] * alpha[3] + |
|
| 1397 |
- X_low[p][idx][0]; |
|
| 1398 |
- X_high[k][idx][1] = |
|
| 1399 |
- X_low[p][idx - 2][1] * alpha[0] + |
|
| 1400 |
- X_low[p][idx - 2][0] * alpha[1] + |
|
| 1401 |
- X_low[p][idx - 1][1] * alpha[2] + |
|
| 1402 |
- X_low[p][idx - 1][0] * alpha[3] + |
|
| 1403 |
- X_low[p][idx][1]; |
|
| 1404 |
- } |
|
| 1385 |
+ sbr->dsp.hf_gen(X_high[k] + ENVELOPE_ADJUSTMENT_OFFSET, |
|
| 1386 |
+ X_low[p] + ENVELOPE_ADJUSTMENT_OFFSET, |
|
| 1387 |
+ alpha0[p], alpha1[p], bw_array[g], |
|
| 1388 |
+ 2 * t_env[0], 2 * t_env[bs_num_env]); |
|
| 1405 | 1389 |
} |
| 1406 | 1390 |
} |
| 1407 | 1391 |
if (k < sbr->m[1] + sbr->kx[1]) |
| ... | ... |
@@ -1502,7 +1443,8 @@ static void sbr_mapping(AACContext *ac, SpectralBandReplication *sbr, |
| 1502 | 1502 |
static void sbr_env_estimate(float (*e_curr)[48], float X_high[64][40][2], |
| 1503 | 1503 |
SpectralBandReplication *sbr, SBRData *ch_data) |
| 1504 | 1504 |
{
|
| 1505 |
- int e, i, m; |
|
| 1505 |
+ int e, m; |
|
| 1506 |
+ int kx1 = sbr->kx[1]; |
|
| 1506 | 1507 |
|
| 1507 | 1508 |
if (sbr->bs_interpol_freq) {
|
| 1508 | 1509 |
for (e = 0; e < ch_data->bs_num_env; e++) {
|
| ... | ... |
@@ -1511,12 +1453,7 @@ static void sbr_env_estimate(float (*e_curr)[48], float X_high[64][40][2], |
| 1511 | 1511 |
int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET; |
| 1512 | 1512 |
|
| 1513 | 1513 |
for (m = 0; m < sbr->m[1]; m++) {
|
| 1514 |
- float sum = 0.0f; |
|
| 1515 |
- |
|
| 1516 |
- for (i = ilb; i < iub; i++) {
|
|
| 1517 |
- sum += X_high[m + sbr->kx[1]][i][0] * X_high[m + sbr->kx[1]][i][0] + |
|
| 1518 |
- X_high[m + sbr->kx[1]][i][1] * X_high[m + sbr->kx[1]][i][1]; |
|
| 1519 |
- } |
|
| 1514 |
+ float sum = sbr->dsp.sum_square(X_high[m+kx1] + ilb, iub - ilb); |
|
| 1520 | 1515 |
e_curr[e][m] = sum * recip_env_size; |
| 1521 | 1516 |
} |
| 1522 | 1517 |
} |
| ... | ... |
@@ -1534,14 +1471,11 @@ static void sbr_env_estimate(float (*e_curr)[48], float X_high[64][40][2], |
| 1534 | 1534 |
const int den = env_size * (table[p + 1] - table[p]); |
| 1535 | 1535 |
|
| 1536 | 1536 |
for (k = table[p]; k < table[p + 1]; k++) {
|
| 1537 |
- for (i = ilb; i < iub; i++) {
|
|
| 1538 |
- sum += X_high[k][i][0] * X_high[k][i][0] + |
|
| 1539 |
- X_high[k][i][1] * X_high[k][i][1]; |
|
| 1540 |
- } |
|
| 1537 |
+ sum += sbr->dsp.sum_square(X_high[k] + ilb, iub - ilb); |
|
| 1541 | 1538 |
} |
| 1542 | 1539 |
sum /= den; |
| 1543 | 1540 |
for (k = table[p]; k < table[p + 1]; k++) {
|
| 1544 |
- e_curr[e][k - sbr->kx[1]] = sum; |
|
| 1541 |
+ e_curr[e][k - kx1] = sum; |
|
| 1545 | 1542 |
} |
| 1546 | 1543 |
} |
| 1547 | 1544 |
} |
| ... | ... |
@@ -1652,55 +1586,35 @@ static void sbr_hf_assemble(float Y[2][38][64][2], const float X_high[64][40][2] |
| 1652 | 1652 |
for (e = 0; e < ch_data->bs_num_env; e++) {
|
| 1653 | 1653 |
for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
|
| 1654 | 1654 |
int phi_sign = (1 - 2*(kx & 1)); |
| 1655 |
+ LOCAL_ALIGNED_16(float, g_filt_tab, [48]); |
|
| 1656 |
+ LOCAL_ALIGNED_16(float, q_filt_tab, [48]); |
|
| 1657 |
+ float *g_filt, *q_filt; |
|
| 1655 | 1658 |
|
| 1656 | 1659 |
if (h_SL && e != e_a[0] && e != e_a[1]) {
|
| 1660 |
+ g_filt = g_filt_tab; |
|
| 1661 |
+ q_filt = q_filt_tab; |
|
| 1657 | 1662 |
for (m = 0; m < m_max; m++) {
|
| 1658 | 1663 |
const int idx1 = i + h_SL; |
| 1659 |
- float g_filt = 0.0f; |
|
| 1660 |
- for (j = 0; j <= h_SL; j++) |
|
| 1661 |
- g_filt += g_temp[idx1 - j][m] * h_smooth[j]; |
|
| 1662 |
- Y[1][i][m + kx][0] = |
|
| 1663 |
- X_high[m + kx][i + ENVELOPE_ADJUSTMENT_OFFSET][0] * g_filt; |
|
| 1664 |
- Y[1][i][m + kx][1] = |
|
| 1665 |
- X_high[m + kx][i + ENVELOPE_ADJUSTMENT_OFFSET][1] * g_filt; |
|
| 1664 |
+ g_filt[m] = 0.0f; |
|
| 1665 |
+ q_filt[m] = 0.0f; |
|
| 1666 |
+ for (j = 0; j <= h_SL; j++) {
|
|
| 1667 |
+ g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j]; |
|
| 1668 |
+ q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j]; |
|
| 1669 |
+ } |
|
| 1666 | 1670 |
} |
| 1667 | 1671 |
} else {
|
| 1668 |
- for (m = 0; m < m_max; m++) {
|
|
| 1669 |
- const float g_filt = g_temp[i + h_SL][m]; |
|
| 1670 |
- Y[1][i][m + kx][0] = |
|
| 1671 |
- X_high[m + kx][i + ENVELOPE_ADJUSTMENT_OFFSET][0] * g_filt; |
|
| 1672 |
- Y[1][i][m + kx][1] = |
|
| 1673 |
- X_high[m + kx][i + ENVELOPE_ADJUSTMENT_OFFSET][1] * g_filt; |
|
| 1674 |
- } |
|
| 1672 |
+ g_filt = g_temp[i + h_SL]; |
|
| 1673 |
+ q_filt = q_temp[i]; |
|
| 1675 | 1674 |
} |
| 1676 | 1675 |
|
| 1676 |
+ sbr->dsp.hf_g_filt(Y[1][i] + kx, X_high + kx, g_filt, m_max, |
|
| 1677 |
+ i + ENVELOPE_ADJUSTMENT_OFFSET); |
|
| 1678 |
+ |
|
| 1677 | 1679 |
if (e != e_a[0] && e != e_a[1]) {
|
| 1678 |
- for (m = 0; m < m_max; m++) {
|
|
| 1679 |
- indexnoise = (indexnoise + 1) & 0x1ff; |
|
| 1680 |
- if (sbr->s_m[e][m]) {
|
|
| 1681 |
- Y[1][i][m + kx][0] += |
|
| 1682 |
- sbr->s_m[e][m] * phi[0][indexsine]; |
|
| 1683 |
- Y[1][i][m + kx][1] += |
|
| 1684 |
- sbr->s_m[e][m] * (phi[1][indexsine] * phi_sign); |
|
| 1685 |
- } else {
|
|
| 1686 |
- float q_filt; |
|
| 1687 |
- if (h_SL) {
|
|
| 1688 |
- const int idx1 = i + h_SL; |
|
| 1689 |
- q_filt = 0.0f; |
|
| 1690 |
- for (j = 0; j <= h_SL; j++) |
|
| 1691 |
- q_filt += q_temp[idx1 - j][m] * h_smooth[j]; |
|
| 1692 |
- } else {
|
|
| 1693 |
- q_filt = q_temp[i][m]; |
|
| 1694 |
- } |
|
| 1695 |
- Y[1][i][m + kx][0] += |
|
| 1696 |
- q_filt * sbr_noise_table[indexnoise][0]; |
|
| 1697 |
- Y[1][i][m + kx][1] += |
|
| 1698 |
- q_filt * sbr_noise_table[indexnoise][1]; |
|
| 1699 |
- } |
|
| 1700 |
- phi_sign = -phi_sign; |
|
| 1701 |
- } |
|
| 1680 |
+ sbr->dsp.hf_apply_noise[indexsine](Y[1][i] + kx, sbr->s_m[e], |
|
| 1681 |
+ q_filt, indexnoise, |
|
| 1682 |
+ kx, m_max); |
|
| 1702 | 1683 |
} else {
|
| 1703 |
- indexnoise = (indexnoise + m_max) & 0x1ff; |
|
| 1704 | 1684 |
for (m = 0; m < m_max; m++) {
|
| 1705 | 1685 |
Y[1][i][m + kx][0] += |
| 1706 | 1686 |
sbr->s_m[e][m] * phi[0][indexsine]; |
| ... | ... |
@@ -1709,6 +1623,7 @@ static void sbr_hf_assemble(float Y[2][38][64][2], const float X_high[64][40][2] |
| 1709 | 1709 |
phi_sign = -phi_sign; |
| 1710 | 1710 |
} |
| 1711 | 1711 |
} |
| 1712 |
+ indexnoise = (indexnoise + m_max) & 0x1ff; |
|
| 1712 | 1713 |
indexsine = (indexsine + 1) & 3; |
| 1713 | 1714 |
} |
| 1714 | 1715 |
} |
| ... | ... |
@@ -1728,12 +1643,12 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac, |
| 1728 | 1728 |
} |
| 1729 | 1729 |
for (ch = 0; ch < nch; ch++) {
|
| 1730 | 1730 |
/* decode channel */ |
| 1731 |
- sbr_qmf_analysis(&ac->dsp, &sbr->mdct_ana, ch ? R : L, sbr->data[ch].analysis_filterbank_samples, |
|
| 1731 |
+ sbr_qmf_analysis(&ac->dsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples, |
|
| 1732 | 1732 |
(float*)sbr->qmf_filter_scratch, |
| 1733 | 1733 |
sbr->data[ch].W); |
| 1734 | 1734 |
sbr_lf_gen(ac, sbr, sbr->X_low, sbr->data[ch].W); |
| 1735 | 1735 |
if (sbr->start) {
|
| 1736 |
- sbr_hf_inverse_filter(sbr->alpha0, sbr->alpha1, sbr->X_low, sbr->k[0]); |
|
| 1736 |
+ sbr_hf_inverse_filter(&sbr->dsp, sbr->alpha0, sbr->alpha1, sbr->X_low, sbr->k[0]); |
|
| 1737 | 1737 |
sbr_chirp(sbr, &sbr->data[ch]); |
| 1738 | 1738 |
sbr_hf_gen(ac, sbr, sbr->X_high, sbr->X_low, sbr->alpha0, sbr->alpha1, |
| 1739 | 1739 |
sbr->data[ch].bw_array, sbr->data[ch].t_env, |
| ... | ... |
@@ -1760,12 +1675,12 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac, |
| 1760 | 1760 |
nch = 2; |
| 1761 | 1761 |
} |
| 1762 | 1762 |
|
| 1763 |
- sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, L, sbr->X[0], sbr->qmf_filter_scratch, |
|
| 1763 |
+ sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, &sbr->dsp, L, sbr->X[0], sbr->qmf_filter_scratch, |
|
| 1764 | 1764 |
sbr->data[0].synthesis_filterbank_samples, |
| 1765 | 1765 |
&sbr->data[0].synthesis_filterbank_samples_offset, |
| 1766 | 1766 |
downsampled); |
| 1767 | 1767 |
if (nch == 2) |
| 1768 |
- sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, R, sbr->X[1], sbr->qmf_filter_scratch, |
|
| 1768 |
+ sbr_qmf_synthesis(&ac->dsp, &sbr->mdct, &sbr->dsp, R, sbr->X[1], sbr->qmf_filter_scratch, |
|
| 1769 | 1769 |
sbr->data[1].synthesis_filterbank_samples, |
| 1770 | 1770 |
&sbr->data[1].synthesis_filterbank_samples_offset, |
| 1771 | 1771 |
downsampled); |
| ... | ... |
@@ -352,7 +352,8 @@ static DECLARE_ALIGNED(16, float, sbr_qmf_window_us)[640] = {
|
| 352 | 352 |
0.8537385600, |
| 353 | 353 |
}; |
| 354 | 354 |
|
| 355 |
-static const float sbr_noise_table[512][2] = {
|
|
| 355 |
+/* First two entries repeated at end to simplify SIMD implementations. */ |
|
| 356 |
+const DECLARE_ALIGNED(16, float, ff_sbr_noise_table)[][2] = {
|
|
| 356 | 357 |
{-0.99948153278296, -0.59483417516607}, { 0.97113454393991, -0.67528515225647},
|
| 357 | 358 |
{ 0.14130051758487, -0.95090983575689}, {-0.47005496701697, -0.37340549728647},
|
| 358 | 359 |
{ 0.80705063769351, 0.29653668284408}, {-0.38981478896926, 0.89572605717087},
|
| ... | ... |
@@ -609,6 +610,7 @@ static const float sbr_noise_table[512][2] = {
|
| 609 | 609 |
{-0.93412041758744, 0.41374052024363}, { 0.96063943315511, 0.93116709541280},
|
| 610 | 610 |
{ 0.97534253457837, 0.86150930812689}, { 0.99642466504163, 0.70190043427512},
|
| 611 | 611 |
{-0.94705089665984, -0.29580042814306}, { 0.91599807087376, -0.98147830385781},
|
| 612 |
+{-0.99948153278296, -0.59483417516607}, { 0.97113454393991, -0.67528515225647},
|
|
| 612 | 613 |
}; |
| 613 | 614 |
|
| 614 | 615 |
#endif /* AVCODEC_AACSBRDATA_H */ |
| ... | ... |
@@ -1,6 +1,8 @@ |
| 1 | 1 |
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ |
| 2 | 2 |
arm/ac3dsp_arm.o |
| 3 | 3 |
|
| 4 |
+OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_init_arm.o |
|
| 5 |
+ |
|
| 4 | 6 |
OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ |
| 5 | 7 |
|
| 6 | 8 |
ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o |
| ... | ... |
@@ -61,6 +63,8 @@ NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \ |
| 61 | 61 |
|
| 62 | 62 |
NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o |
| 63 | 63 |
|
| 64 |
+NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_neon.o |
|
| 65 |
+ |
|
| 64 | 66 |
NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \ |
| 65 | 67 |
arm/synth_filter_neon.o \ |
| 66 | 68 |
|
| 67 | 69 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,70 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2012 Mans Rullgard |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of Libav. |
|
| 4 |
+ * |
|
| 5 |
+ * Libav is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with Libav; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "config.h" |
|
| 21 |
+#include "libavutil/attributes.h" |
|
| 22 |
+#include "libavcodec/sbrdsp.h" |
|
| 23 |
+ |
|
| 24 |
+void ff_sbr_sum64x5_neon(float *z); |
|
| 25 |
+float ff_sbr_sum_square_neon(float (*x)[2], int n); |
|
| 26 |
+void ff_sbr_neg_odd_64_neon(float *x); |
|
| 27 |
+void ff_sbr_qmf_pre_shuffle_neon(float *z); |
|
| 28 |
+void ff_sbr_qmf_post_shuffle_neon(float W[32][2], const float *z); |
|
| 29 |
+void ff_sbr_qmf_deint_neg_neon(float *v, const float *src); |
|
| 30 |
+void ff_sbr_qmf_deint_bfly_neon(float *v, const float *src0, const float *src1); |
|
| 31 |
+void ff_sbr_hf_g_filt_neon(float (*Y)[2], const float (*X_high)[40][2], |
|
| 32 |
+ const float *g_filt, int m_max, int ixh); |
|
| 33 |
+void ff_sbr_hf_gen_neon(float (*X_high)[2], const float (*X_low)[2], |
|
| 34 |
+ const float alpha0[2], const float alpha1[2], |
|
| 35 |
+ float bw, int start, int end); |
|
| 36 |
+void ff_sbr_autocorrelate_neon(const float x[40][2], float phi[3][2][2]); |
|
| 37 |
+ |
|
| 38 |
+void ff_sbr_hf_apply_noise_0_neon(float Y[64][2], const float *s_m, |
|
| 39 |
+ const float *q_filt, int noise, |
|
| 40 |
+ int kx, int m_max); |
|
| 41 |
+void ff_sbr_hf_apply_noise_1_neon(float Y[64][2], const float *s_m, |
|
| 42 |
+ const float *q_filt, int noise, |
|
| 43 |
+ int kx, int m_max); |
|
| 44 |
+void ff_sbr_hf_apply_noise_2_neon(float Y[64][2], const float *s_m, |
|
| 45 |
+ const float *q_filt, int noise, |
|
| 46 |
+ int kx, int m_max); |
|
| 47 |
+void ff_sbr_hf_apply_noise_3_neon(float Y[64][2], const float *s_m, |
|
| 48 |
+ const float *q_filt, int noise, |
|
| 49 |
+ int kx, int m_max); |
|
| 50 |
+ |
|
| 51 |
+av_cold void ff_sbrdsp_init_arm(SBRDSPContext *s) |
|
| 52 |
+{
|
|
| 53 |
+ if (HAVE_NEON) {
|
|
| 54 |
+ s->sum64x5 = ff_sbr_sum64x5_neon; |
|
| 55 |
+ s->sum_square = ff_sbr_sum_square_neon; |
|
| 56 |
+ s->neg_odd_64 = ff_sbr_neg_odd_64_neon; |
|
| 57 |
+ s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_neon; |
|
| 58 |
+ s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_neon; |
|
| 59 |
+ s->qmf_deint_neg = ff_sbr_qmf_deint_neg_neon; |
|
| 60 |
+ s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_neon; |
|
| 61 |
+ s->hf_g_filt = ff_sbr_hf_g_filt_neon; |
|
| 62 |
+ s->hf_gen = ff_sbr_hf_gen_neon; |
|
| 63 |
+ s->autocorrelate = ff_sbr_autocorrelate_neon; |
|
| 64 |
+ s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_neon; |
|
| 65 |
+ s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_neon; |
|
| 66 |
+ s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_neon; |
|
| 67 |
+ s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_neon; |
|
| 68 |
+ } |
|
| 69 |
+} |
| 0 | 70 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,411 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2012 Mans Rullgard |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of Libav. |
|
| 4 |
+ * |
|
| 5 |
+ * Libav is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with Libav; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#include "asm.S" |
|
| 21 |
+ |
|
| 22 |
+function ff_sbr_sum64x5_neon, export=1 |
|
| 23 |
+ push {lr}
|
|
| 24 |
+ add r1, r0, # 64*4 |
|
| 25 |
+ add r2, r0, #128*4 |
|
| 26 |
+ add r3, r0, #192*4 |
|
| 27 |
+ add lr, r0, #256*4 |
|
| 28 |
+ mov r12, #64 |
|
| 29 |
+1: |
|
| 30 |
+ vld1.32 {q0}, [r0,:128]
|
|
| 31 |
+ vld1.32 {q1}, [r1,:128]!
|
|
| 32 |
+ vadd.f32 q0, q0, q1 |
|
| 33 |
+ vld1.32 {q2}, [r2,:128]!
|
|
| 34 |
+ vadd.f32 q0, q0, q2 |
|
| 35 |
+ vld1.32 {q3}, [r3,:128]!
|
|
| 36 |
+ vadd.f32 q0, q0, q3 |
|
| 37 |
+ vld1.32 {q8}, [lr,:128]!
|
|
| 38 |
+ vadd.f32 q0, q0, q8 |
|
| 39 |
+ vst1.32 {q0}, [r0,:128]!
|
|
| 40 |
+ subs r12, #4 |
|
| 41 |
+ bgt 1b |
|
| 42 |
+ pop {pc}
|
|
| 43 |
+endfunc |
|
| 44 |
+ |
|
| 45 |
+function ff_sbr_sum_square_neon, export=1 |
|
| 46 |
+ vmov.f32 q0, #0.0 |
|
| 47 |
+1: |
|
| 48 |
+ vld1.32 {q1}, [r0,:128]!
|
|
| 49 |
+ vmla.f32 q0, q1, q1 |
|
| 50 |
+ subs r1, r1, #2 |
|
| 51 |
+ bgt 1b |
|
| 52 |
+ vadd.f32 d0, d0, d1 |
|
| 53 |
+ vpadd.f32 d0, d0, d0 |
|
| 54 |
+NOVFP vmov.32 r0, d0[0] |
|
| 55 |
+ bx lr |
|
| 56 |
+endfunc |
|
| 57 |
+ |
|
| 58 |
+function ff_sbr_neg_odd_64_neon, export=1 |
|
| 59 |
+ mov r1, r0 |
|
| 60 |
+ vmov.i32 q8, #1<<31 |
|
| 61 |
+ vld2.32 {q0,q1}, [r0,:128]!
|
|
| 62 |
+ veor q1, q1, q8 |
|
| 63 |
+ vld2.32 {q2,q3}, [r0,:128]!
|
|
| 64 |
+ .rept 3 |
|
| 65 |
+ vst2.32 {q0,q1}, [r1,:128]!
|
|
| 66 |
+ veor q3, q3, q8 |
|
| 67 |
+ vld2.32 {q0,q1}, [r0,:128]!
|
|
| 68 |
+ vst2.32 {q2,q3}, [r1,:128]!
|
|
| 69 |
+ veor q1, q1, q8 |
|
| 70 |
+ vld2.32 {q2,q3}, [r0,:128]!
|
|
| 71 |
+ .endr |
|
| 72 |
+ veor q3, q3, q8 |
|
| 73 |
+ vst2.32 {q0,q1}, [r1,:128]!
|
|
| 74 |
+ vst2.32 {q2,q3}, [r1,:128]!
|
|
| 75 |
+ bx lr |
|
| 76 |
+endfunc |
|
| 77 |
+ |
|
| 78 |
+function ff_sbr_qmf_pre_shuffle_neon, export=1 |
|
| 79 |
+ add r1, r0, #60*4 |
|
| 80 |
+ add r2, r0, #64*4 |
|
| 81 |
+ vld1.32 {d0}, [r0,:64]!
|
|
| 82 |
+ vst1.32 {d0}, [r2,:64]!
|
|
| 83 |
+ mov r3, #-16 |
|
| 84 |
+ mov r12, #24 |
|
| 85 |
+ vmov.i32 q8, #1<<31 |
|
| 86 |
+ vld1.32 {q0}, [r1,:128], r3
|
|
| 87 |
+ vld1.32 {d2}, [r0,:64]!
|
|
| 88 |
+1: |
|
| 89 |
+ vld1.32 {d3,d4}, [r0,:128]!
|
|
| 90 |
+ vrev64.32 q0, q0 |
|
| 91 |
+ vld1.32 {q9}, [r1,:128], r3
|
|
| 92 |
+ veor q0, q0, q8 |
|
| 93 |
+ vld1.32 {d5,d6}, [r0,:128]!
|
|
| 94 |
+ vswp d0, d1 |
|
| 95 |
+ vrev64.32 q9, q9 |
|
| 96 |
+ vst2.32 {q0,q1}, [r2,:64]!
|
|
| 97 |
+ vmov q10, q2 |
|
| 98 |
+ veor q9, q9, q8 |
|
| 99 |
+ vmov d2, d6 |
|
| 100 |
+ vswp d18, d19 |
|
| 101 |
+ vld1.32 {q0}, [r1,:128], r3
|
|
| 102 |
+ vst2.32 {q9,q10}, [r2,:64]!
|
|
| 103 |
+ subs r12, r12, #8 |
|
| 104 |
+ bgt 1b |
|
| 105 |
+ vld1.32 {d3,d4}, [r0,:128]!
|
|
| 106 |
+ vrev64.32 q0, q0 |
|
| 107 |
+ vld1.32 {q9}, [r1,:128], r3
|
|
| 108 |
+ veor q0, q0, q8 |
|
| 109 |
+ vld1.32 {d5}, [r0,:64]!
|
|
| 110 |
+ vswp d0, d1 |
|
| 111 |
+ vrev64.32 q9, q9 |
|
| 112 |
+ vst2.32 {q0,q1}, [r2,:64]!
|
|
| 113 |
+ vswp d4, d5 |
|
| 114 |
+ veor q1, q9, q8 |
|
| 115 |
+ vst2.32 {d3,d5}, [r2,:64]!
|
|
| 116 |
+ vst2.32 {d2[0],d4[0]}, [r2,:64]!
|
|
| 117 |
+ bx lr |
|
| 118 |
+endfunc |
|
| 119 |
+ |
|
| 120 |
+function ff_sbr_qmf_post_shuffle_neon, export=1 |
|
| 121 |
+ add r2, r1, #60*4 |
|
| 122 |
+ mov r3, #-16 |
|
| 123 |
+ mov r12, #32 |
|
| 124 |
+ vmov.i32 q8, #1<<31 |
|
| 125 |
+ vld1.32 {q0}, [r2,:128], r3
|
|
| 126 |
+ vld1.32 {q1}, [r1,:128]!
|
|
| 127 |
+1: |
|
| 128 |
+ pld [r2, #-32] |
|
| 129 |
+ vrev64.32 q0, q0 |
|
| 130 |
+ vswp d2, d3 |
|
| 131 |
+ veor q0, q0, q8 |
|
| 132 |
+ vld1.32 {q2}, [r2,:128], r3
|
|
| 133 |
+ vld1.32 {q3}, [r1,:128]!
|
|
| 134 |
+ vst2.32 {d1,d3}, [r0,:128]!
|
|
| 135 |
+ vst2.32 {d0,d2}, [r0,:128]!
|
|
| 136 |
+ pld [r2, #-32] |
|
| 137 |
+ vrev64.32 q2, q2 |
|
| 138 |
+ vswp d6, d7 |
|
| 139 |
+ veor q2, q2, q8 |
|
| 140 |
+ vld1.32 {q0}, [r2,:128], r3
|
|
| 141 |
+ vld1.32 {q1}, [r1,:128]!
|
|
| 142 |
+ vst2.32 {d5,d7}, [r0,:128]!
|
|
| 143 |
+ vst2.32 {d4,d6}, [r0,:128]!
|
|
| 144 |
+ subs r12, r12, #8 |
|
| 145 |
+ bgt 1b |
|
| 146 |
+ bx lr |
|
| 147 |
+endfunc |
|
| 148 |
+ |
|
| 149 |
+function ff_sbr_qmf_deint_neg_neon, export=1 |
|
| 150 |
+ add r1, r1, #60*4 |
|
| 151 |
+ add r2, r0, #62*4 |
|
| 152 |
+ mov r3, #-16 |
|
| 153 |
+ mov r12, #32 |
|
| 154 |
+ vmov.i32 d2, #1<<31 |
|
| 155 |
+1: |
|
| 156 |
+ vld2.32 {d0,d1}, [r1,:128], r3
|
|
| 157 |
+ veor d0, d0, d2 |
|
| 158 |
+ vrev64.32 d1, d1 |
|
| 159 |
+ vst1.32 {d0}, [r2,:64]
|
|
| 160 |
+ vst1.32 {d1}, [r0,:64]!
|
|
| 161 |
+ sub r2, r2, #8 |
|
| 162 |
+ subs r12, r12, #2 |
|
| 163 |
+ bgt 1b |
|
| 164 |
+ bx lr |
|
| 165 |
+endfunc |
|
| 166 |
+ |
|
| 167 |
+function ff_sbr_qmf_deint_bfly_neon, export=1 |
|
| 168 |
+ push {lr}
|
|
| 169 |
+ add r2, r2, #60*4 |
|
| 170 |
+ add r3, r0, #124*4 |
|
| 171 |
+ mov r12, #64 |
|
| 172 |
+ mov lr, #-16 |
|
| 173 |
+1: |
|
| 174 |
+ vld1.32 {q0}, [r1,:128]!
|
|
| 175 |
+ vld1.32 {q1}, [r2,:128], lr
|
|
| 176 |
+ vrev64.32 q2, q0 |
|
| 177 |
+ vrev64.32 q3, q1 |
|
| 178 |
+ vadd.f32 d3, d4, d3 |
|
| 179 |
+ vadd.f32 d2, d5, d2 |
|
| 180 |
+ vsub.f32 d0, d0, d7 |
|
| 181 |
+ vsub.f32 d1, d1, d6 |
|
| 182 |
+ vst1.32 {q1}, [r3,:128], lr
|
|
| 183 |
+ vst1.32 {q0}, [r0,:128]!
|
|
| 184 |
+ subs r12, r12, #4 |
|
| 185 |
+ bgt 1b |
|
| 186 |
+ pop {pc}
|
|
| 187 |
+endfunc |
|
| 188 |
+ |
|
| 189 |
+function ff_sbr_hf_g_filt_neon, export=1 |
|
| 190 |
+ ldr r12, [sp] |
|
| 191 |
+ add r1, r1, r12, lsl #3 |
|
| 192 |
+ mov r12, #40*2*4 |
|
| 193 |
+ sub r3, r3, #1 |
|
| 194 |
+ vld2.32 {d2[],d3[]},[r2,:64]!
|
|
| 195 |
+ vld1.32 {d0}, [r1,:64], r12
|
|
| 196 |
+1: |
|
| 197 |
+ vld1.32 {d1}, [r1,:64], r12
|
|
| 198 |
+ vmul.f32 q3, q0, q1 |
|
| 199 |
+ vld2.32 {d2[],d3[]},[r2,:64]!
|
|
| 200 |
+ vld1.32 {d0}, [r1,:64], r12
|
|
| 201 |
+ vst1.32 {q3}, [r0,:64]!
|
|
| 202 |
+ subs r3, r3, #2 |
|
| 203 |
+ bgt 1b |
|
| 204 |
+ it lt |
|
| 205 |
+ bxlt lr |
|
| 206 |
+ vmul.f32 d0, d0, d2 |
|
| 207 |
+ vst1.32 {d0}, [r0,:64]!
|
|
| 208 |
+ bx lr |
|
| 209 |
+endfunc |
|
| 210 |
+ |
|
| 211 |
+function ff_sbr_hf_gen_neon, export=1 |
|
| 212 |
+NOVFP vld1.32 {d1[]}, [sp,:32]
|
|
| 213 |
+VFP vdup.32 d1, d0[0] |
|
| 214 |
+ vmul.f32 d0, d1, d1 |
|
| 215 |
+ vld1.32 {d3}, [r2,:64]
|
|
| 216 |
+ vld1.32 {d2}, [r3,:64]
|
|
| 217 |
+ vmul.f32 q0, q0, q1 |
|
| 218 |
+ ldrd r2, r3, [sp, #4*!HAVE_VFP_ARGS] |
|
| 219 |
+ vtrn.32 d0, d1 |
|
| 220 |
+ vneg.f32 d18, d1 |
|
| 221 |
+ vtrn.32 d18, d1 |
|
| 222 |
+ add r0, r0, r2, lsl #3 |
|
| 223 |
+ add r1, r1, r2, lsl #3 |
|
| 224 |
+ sub r1, r1, #2*8 |
|
| 225 |
+ sub r3, r3, r2 |
|
| 226 |
+ vld1.32 {q1}, [r1,:128]!
|
|
| 227 |
+1: |
|
| 228 |
+ vld1.32 {q3}, [r1,:128]!
|
|
| 229 |
+ vrev64.32 q2, q1 |
|
| 230 |
+ vmov q8, q3 |
|
| 231 |
+ vrev64.32 d20, d3 |
|
| 232 |
+ vrev64.32 d21, d6 |
|
| 233 |
+ vmla.f32 q3, q1, d0[0] |
|
| 234 |
+ vmla.f32 d6, d4, d18 |
|
| 235 |
+ vmla.f32 d7, d20, d18 |
|
| 236 |
+ vmla.f32 d6, d3, d0[1] |
|
| 237 |
+ vmla.f32 d7, d16, d0[1] |
|
| 238 |
+ vmla.f32 d6, d5, d1 |
|
| 239 |
+ vmla.f32 d7, d21, d1 |
|
| 240 |
+ vmov q1, q8 |
|
| 241 |
+ vst1.32 {q3}, [r0,:128]!
|
|
| 242 |
+ subs r3, r3, #2 |
|
| 243 |
+ bgt 1b |
|
| 244 |
+ bx lr |
|
| 245 |
+endfunc |
|
| 246 |
+ |
|
| 247 |
+function ff_sbr_autocorrelate_neon, export=1 |
|
| 248 |
+ vld1.32 {q0}, [r0,:128]!
|
|
| 249 |
+ vmov.f32 q1, #0.0 |
|
| 250 |
+ vmov.f32 q3, #0.0 |
|
| 251 |
+ vmov.f32 d20, #0.0 |
|
| 252 |
+ vmul.f32 d21, d1, d1 |
|
| 253 |
+ vmov q8, q0 |
|
| 254 |
+ vmov q11, q0 |
|
| 255 |
+ mov r12, #36 |
|
| 256 |
+1: |
|
| 257 |
+ vld1.32 {q2}, [r0,:128]!
|
|
| 258 |
+ vrev64.32 q12, q2 |
|
| 259 |
+ vmla.f32 q10, q2, q2 |
|
| 260 |
+ vmla.f32 d2, d1, d4 |
|
| 261 |
+ vmla.f32 d3, d1, d24 |
|
| 262 |
+ vmla.f32 d6, d0, d4 |
|
| 263 |
+ vmla.f32 d7, d0, d24 |
|
| 264 |
+ vmla.f32 d2, d4, d5 |
|
| 265 |
+ vmla.f32 d3, d4, d25 |
|
| 266 |
+ vmla.f32 d6, d1, d5 |
|
| 267 |
+ vmla.f32 d7, d1, d25 |
|
| 268 |
+ vmov q0, q2 |
|
| 269 |
+ subs r12, r12, #2 |
|
| 270 |
+ bgt 1b |
|
| 271 |
+ vld1.32 {q2}, [r0,:128]!
|
|
| 272 |
+ vrev64.32 q12, q2 |
|
| 273 |
+ vmla.f32 d2, d1, d4 |
|
| 274 |
+ vmla.f32 d3, d1, d24 |
|
| 275 |
+ vmla.f32 d6, d0, d4 |
|
| 276 |
+ vmla.f32 d7, d0, d24 |
|
| 277 |
+ vadd.f32 d20, d20, d21 |
|
| 278 |
+ vrev64.32 d18, d17 |
|
| 279 |
+ vmla.f32 d6, d1, d5 |
|
| 280 |
+ vmla.f32 d7, d1, d25 |
|
| 281 |
+ vmov q0, q1 |
|
| 282 |
+ vmla.f32 d0, d16, d17 |
|
| 283 |
+ vmla.f32 d1, d16, d18 |
|
| 284 |
+ vmla.f32 d2, d4, d5 |
|
| 285 |
+ vmla.f32 d3, d4, d25 |
|
| 286 |
+ vneg.f32 s15, s15 |
|
| 287 |
+ vmov d21, d20 |
|
| 288 |
+ vpadd.f32 d0, d0, d2 |
|
| 289 |
+ vpadd.f32 d7, d6, d7 |
|
| 290 |
+ vtrn.32 d1, d3 |
|
| 291 |
+ vsub.f32 d6, d1, d3 |
|
| 292 |
+ vmla.f32 d20, d22, d22 |
|
| 293 |
+ vmla.f32 d21, d4, d4 |
|
| 294 |
+ vtrn.32 d0, d6 |
|
| 295 |
+ vpadd.f32 d20, d20, d21 |
|
| 296 |
+ vst1.32 {q3}, [r1,:128]!
|
|
| 297 |
+ vst1.32 {d20[1]}, [r1,:32]
|
|
| 298 |
+ add r1, r1, #2*4 |
|
| 299 |
+ vst1.32 {d0}, [r1,:64]
|
|
| 300 |
+ add r1, r1, #4*4 |
|
| 301 |
+ vst1.32 {d20[0]}, [r1,:32]
|
|
| 302 |
+ bx lr |
|
| 303 |
+endfunc |
|
| 304 |
+ |
|
| 305 |
+function ff_sbr_hf_apply_noise_0_neon, export=1 |
|
| 306 |
+ vmov.i32 d3, #0 |
|
| 307 |
+.Lhf_apply_noise_0: |
|
| 308 |
+ push {r4,lr}
|
|
| 309 |
+ ldr r12, [sp, #12] |
|
| 310 |
+ movrel r4, X(ff_sbr_noise_table) |
|
| 311 |
+ add r3, r3, #1 |
|
| 312 |
+ bfc r3, #9, #23 |
|
| 313 |
+ sub r12, r12, #1 |
|
| 314 |
+1: |
|
| 315 |
+ add lr, r4, r3, lsl #3 |
|
| 316 |
+ vld2.32 {q0}, [r0,:64]
|
|
| 317 |
+ vld2.32 {q3}, [lr,:64]
|
|
| 318 |
+ vld1.32 {d2}, [r1,:64]!
|
|
| 319 |
+ vld1.32 {d18}, [r2,:64]!
|
|
| 320 |
+ vceq.f32 d16, d2, #0 |
|
| 321 |
+ veor d2, d2, d3 |
|
| 322 |
+ vmov q2, q0 |
|
| 323 |
+ vmla.f32 d0, d6, d18 |
|
| 324 |
+ vmla.f32 d1, d7, d18 |
|
| 325 |
+ vadd.f32 d4, d4, d2 |
|
| 326 |
+ add r3, r3, #2 |
|
| 327 |
+ bfc r3, #9, #23 |
|
| 328 |
+ vbif d0, d4, d16 |
|
| 329 |
+ vbif d1, d5, d16 |
|
| 330 |
+ vst2.32 {q0}, [r0,:64]!
|
|
| 331 |
+ subs r12, r12, #2 |
|
| 332 |
+ bgt 1b |
|
| 333 |
+ blt 2f |
|
| 334 |
+ add lr, r4, r3, lsl #3 |
|
| 335 |
+ vld1.32 {d0}, [r0,:64]
|
|
| 336 |
+ vld1.32 {d6}, [lr,:64]
|
|
| 337 |
+ vld1.32 {d2[]}, [r1,:32]!
|
|
| 338 |
+ vld1.32 {d3[]}, [r2,:32]!
|
|
| 339 |
+ vceq.f32 d4, d2, #0 |
|
| 340 |
+ veor d2, d2, d3 |
|
| 341 |
+ vmov d1, d0 |
|
| 342 |
+ vmla.f32 d0, d6, d3 |
|
| 343 |
+ vadd.f32 s2, s2, s4 |
|
| 344 |
+ vbif d0, d1, d4 |
|
| 345 |
+ vst1.32 {d0}, [r0,:64]!
|
|
| 346 |
+2: |
|
| 347 |
+ pop {r4,pc}
|
|
| 348 |
+endfunc |
|
| 349 |
+ |
|
| 350 |
+function ff_sbr_hf_apply_noise_1_neon, export=1 |
|
| 351 |
+ ldr r12, [sp] |
|
| 352 |
+ push {r4,lr}
|
|
| 353 |
+ lsl r12, r12, #31 |
|
| 354 |
+ eor lr, r12, #1<<31 |
|
| 355 |
+ vmov d3, r12, lr |
|
| 356 |
+.Lhf_apply_noise_1: |
|
| 357 |
+ ldr r12, [sp, #12] |
|
| 358 |
+ movrel r4, X(ff_sbr_noise_table) |
|
| 359 |
+ add r3, r3, #1 |
|
| 360 |
+ bfc r3, #9, #23 |
|
| 361 |
+ sub r12, r12, #1 |
|
| 362 |
+1: |
|
| 363 |
+ add lr, r4, r3, lsl #3 |
|
| 364 |
+ vld2.32 {q0}, [r0,:64]
|
|
| 365 |
+ vld2.32 {q3}, [lr,:64]
|
|
| 366 |
+ vld1.32 {d2}, [r1,:64]!
|
|
| 367 |
+ vld1.32 {d18}, [r2,:64]!
|
|
| 368 |
+ vceq.f32 d16, d2, #0 |
|
| 369 |
+ veor d2, d2, d3 |
|
| 370 |
+ vmov q2, q0 |
|
| 371 |
+ vmla.f32 d0, d6, d18 |
|
| 372 |
+ vmla.f32 d1, d7, d18 |
|
| 373 |
+ vadd.f32 d5, d5, d2 |
|
| 374 |
+ add r3, r3, #2 |
|
| 375 |
+ bfc r3, #9, #23 |
|
| 376 |
+ vbif d0, d4, d16 |
|
| 377 |
+ vbif d1, d5, d16 |
|
| 378 |
+ vst2.32 {q0}, [r0,:64]!
|
|
| 379 |
+ subs r12, r12, #2 |
|
| 380 |
+ bgt 1b |
|
| 381 |
+ blt 2f |
|
| 382 |
+ add lr, r4, r3, lsl #3 |
|
| 383 |
+ vld1.32 {d0}, [r0,:64]
|
|
| 384 |
+ vld1.32 {d6}, [lr,:64]
|
|
| 385 |
+ vld1.32 {d2[]}, [r1,:32]!
|
|
| 386 |
+ vld1.32 {d18[]}, [r2,:32]!
|
|
| 387 |
+ vceq.f32 d4, d2, #0 |
|
| 388 |
+ veor d2, d2, d3 |
|
| 389 |
+ vmov d1, d0 |
|
| 390 |
+ vmla.f32 d0, d6, d18 |
|
| 391 |
+ vadd.f32 s3, s3, s5 |
|
| 392 |
+ vbif d0, d1, d4 |
|
| 393 |
+ vst1.32 {d0}, [r0,:64]!
|
|
| 394 |
+2: |
|
| 395 |
+ pop {r4,pc}
|
|
| 396 |
+endfunc |
|
| 397 |
+ |
|
| 398 |
+function ff_sbr_hf_apply_noise_2_neon, export=1 |
|
| 399 |
+ vmov.i32 d3, #1<<31 |
|
| 400 |
+ b .Lhf_apply_noise_0 |
|
| 401 |
+endfunc |
|
| 402 |
+ |
|
| 403 |
+function ff_sbr_hf_apply_noise_3_neon, export=1 |
|
| 404 |
+ ldr r12, [sp] |
|
| 405 |
+ push {r4,lr}
|
|
| 406 |
+ lsl r12, r12, #31 |
|
| 407 |
+ eor lr, r12, #1<<31 |
|
| 408 |
+ vmov d3, lr, r12 |
|
| 409 |
+ b .Lhf_apply_noise_1 |
|
| 410 |
+endfunc |
| ... | ... |
@@ -3566,6 +3566,9 @@ AVCodecContext *avcodec_alloc_context2(enum AVMediaType); |
| 3566 | 3566 |
* @param codec if non-NULL, allocate private data and initialize defaults |
| 3567 | 3567 |
* for the given codec. It is illegal to then call avcodec_open2() |
| 3568 | 3568 |
* with a different codec. |
| 3569 |
+ * If NULL, then the codec-specific defaults won't be initialized, |
|
| 3570 |
+ * which may result in suboptimal default settings (this is |
|
| 3571 |
+ * important mainly for encoders, e.g. libx264). |
|
| 3569 | 3572 |
* |
| 3570 | 3573 |
* @return An AVCodecContext filled with default values or NULL on failure. |
| 3571 | 3574 |
* @see avcodec_get_context_defaults |
| ... | ... |
@@ -32,6 +32,7 @@ |
| 32 | 32 |
#include <stdint.h> |
| 33 | 33 |
#include "fft.h" |
| 34 | 34 |
#include "aacps.h" |
| 35 |
+#include "sbrdsp.h" |
|
| 35 | 36 |
|
| 36 | 37 |
/** |
| 37 | 38 |
* Spectral Band Replication header - spectrum parameters that invoke a reset if they differ from the previous header. |
| ... | ... |
@@ -87,8 +88,8 @@ typedef struct {
|
| 87 | 87 |
///QMF values of the original signal |
| 88 | 88 |
float W[2][32][32][2]; |
| 89 | 89 |
///QMF output of the HF adjustor |
| 90 |
- float Y[2][38][64][2]; |
|
| 91 |
- float g_temp[42][48]; |
|
| 90 |
+ DECLARE_ALIGNED(16, float, Y)[2][38][64][2]; |
|
| 91 |
+ DECLARE_ALIGNED(16, float, g_temp)[42][48]; |
|
| 92 | 92 |
float q_temp[42][48]; |
| 93 | 93 |
uint8_t s_indexmapped[8][48]; |
| 94 | 94 |
///Envelope scalefactors |
| ... | ... |
@@ -155,15 +156,15 @@ typedef struct {
|
| 155 | 155 |
uint8_t patch_num_subbands[6]; |
| 156 | 156 |
uint8_t patch_start_subband[6]; |
| 157 | 157 |
///QMF low frequency input to the HF generator |
| 158 |
- float X_low[32][40][2]; |
|
| 158 |
+ DECLARE_ALIGNED(16, float, X_low)[32][40][2]; |
|
| 159 | 159 |
///QMF output of the HF generator |
| 160 |
- float X_high[64][40][2]; |
|
| 160 |
+ DECLARE_ALIGNED(16, float, X_high)[64][40][2]; |
|
| 161 | 161 |
///QMF values of the reconstructed signal |
| 162 | 162 |
DECLARE_ALIGNED(16, float, X)[2][2][38][64]; |
| 163 | 163 |
///Zeroth coefficient used to filter the subband signals |
| 164 |
- float alpha0[64][2]; |
|
| 164 |
+ DECLARE_ALIGNED(16, float, alpha0)[64][2]; |
|
| 165 | 165 |
///First coefficient used to filter the subband signals |
| 166 |
- float alpha1[64][2]; |
|
| 166 |
+ DECLARE_ALIGNED(16, float, alpha1)[64][2]; |
|
| 167 | 167 |
///Dequantized envelope scalefactors, remapped |
| 168 | 168 |
float e_origmapped[7][48]; |
| 169 | 169 |
///Dequantized noise scalefactors, remapped |
| ... | ... |
@@ -180,6 +181,7 @@ typedef struct {
|
| 180 | 180 |
DECLARE_ALIGNED(16, float, qmf_filter_scratch)[5][64]; |
| 181 | 181 |
FFTContext mdct_ana; |
| 182 | 182 |
FFTContext mdct; |
| 183 |
+ SBRDSPContext dsp; |
|
| 183 | 184 |
} SpectralBandReplication; |
| 184 | 185 |
|
| 185 | 186 |
#endif /* AVCODEC_SBR_H */ |
| 186 | 187 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,241 @@ |
| 0 |
+/* |
|
| 1 |
+ * AAC Spectral Band Replication decoding functions |
|
| 2 |
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl ) |
|
| 3 |
+ * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com> |
|
| 4 |
+ * |
|
| 5 |
+ * This file is part of Libav. |
|
| 6 |
+ * |
|
| 7 |
+ * Libav is free software; you can redistribute it and/or |
|
| 8 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 9 |
+ * License as published by the Free Software Foundation; either |
|
| 10 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 11 |
+ * |
|
| 12 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 13 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 14 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 15 |
+ * Lesser General Public License for more details. |
|
| 16 |
+ * |
|
| 17 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 18 |
+ * License along with Libav; if not, write to the Free Software |
|
| 19 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 20 |
+ */ |
|
| 21 |
+ |
|
| 22 |
+#include "config.h" |
|
| 23 |
+#include "libavutil/attributes.h" |
|
| 24 |
+#include "sbrdsp.h" |
|
| 25 |
+ |
|
| 26 |
+static void sbr_sum64x5_c(float *z) |
|
| 27 |
+{
|
|
| 28 |
+ int k; |
|
| 29 |
+ for (k = 0; k < 64; k++) {
|
|
| 30 |
+ float f = z[k] + z[k + 64] + z[k + 128] + z[k + 192] + z[k + 256]; |
|
| 31 |
+ z[k] = f; |
|
| 32 |
+ } |
|
| 33 |
+} |
|
| 34 |
+ |
|
| 35 |
+static float sbr_sum_square_c(float (*x)[2], int n) |
|
| 36 |
+{
|
|
| 37 |
+ float sum = 0.0f; |
|
| 38 |
+ int i; |
|
| 39 |
+ |
|
| 40 |
+ for (i = 0; i < n; i++) |
|
| 41 |
+ sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; |
|
| 42 |
+ |
|
| 43 |
+ return sum; |
|
| 44 |
+} |
|
| 45 |
+ |
|
| 46 |
+static void sbr_neg_odd_64_c(float *x) |
|
| 47 |
+{
|
|
| 48 |
+ int i; |
|
| 49 |
+ for (i = 1; i < 64; i += 2) |
|
| 50 |
+ x[i] = -x[i]; |
|
| 51 |
+} |
|
| 52 |
+ |
|
| 53 |
+static void sbr_qmf_pre_shuffle_c(float *z) |
|
| 54 |
+{
|
|
| 55 |
+ int k; |
|
| 56 |
+ z[64] = z[0]; |
|
| 57 |
+ z[65] = z[1]; |
|
| 58 |
+ for (k = 1; k < 32; k++) {
|
|
| 59 |
+ z[64+2*k ] = -z[64 - k]; |
|
| 60 |
+ z[64+2*k+1] = z[ k + 1]; |
|
| 61 |
+ } |
|
| 62 |
+} |
|
| 63 |
+ |
|
| 64 |
+static void sbr_qmf_post_shuffle_c(float W[32][2], const float *z) |
|
| 65 |
+{
|
|
| 66 |
+ int k; |
|
| 67 |
+ for (k = 0; k < 32; k++) {
|
|
| 68 |
+ W[k][0] = -z[63-k]; |
|
| 69 |
+ W[k][1] = z[k]; |
|
| 70 |
+ } |
|
| 71 |
+} |
|
| 72 |
+ |
|
| 73 |
+static void sbr_qmf_deint_neg_c(float *v, const float *src) |
|
| 74 |
+{
|
|
| 75 |
+ int i; |
|
| 76 |
+ for (i = 0; i < 32; i++) {
|
|
| 77 |
+ v[ i] = src[63 - 2*i ]; |
|
| 78 |
+ v[63 - i] = -src[63 - 2*i - 1]; |
|
| 79 |
+ } |
|
| 80 |
+} |
|
| 81 |
+ |
|
| 82 |
+static void sbr_qmf_deint_bfly_c(float *v, const float *src0, const float *src1) |
|
| 83 |
+{
|
|
| 84 |
+ int i; |
|
| 85 |
+ for (i = 0; i < 64; i++) {
|
|
| 86 |
+ v[ i] = src0[i] - src1[63 - i]; |
|
| 87 |
+ v[127 - i] = src0[i] + src1[63 - i]; |
|
| 88 |
+ } |
|
| 89 |
+} |
|
| 90 |
+ |
|
| 91 |
+static av_always_inline void autocorrelate(const float x[40][2], |
|
| 92 |
+ float phi[3][2][2], int lag) |
|
| 93 |
+{
|
|
| 94 |
+ int i; |
|
| 95 |
+ float real_sum = 0.0f; |
|
| 96 |
+ float imag_sum = 0.0f; |
|
| 97 |
+ if (lag) {
|
|
| 98 |
+ for (i = 1; i < 38; i++) {
|
|
| 99 |
+ real_sum += x[i][0] * x[i+lag][0] + x[i][1] * x[i+lag][1]; |
|
| 100 |
+ imag_sum += x[i][0] * x[i+lag][1] - x[i][1] * x[i+lag][0]; |
|
| 101 |
+ } |
|
| 102 |
+ phi[2-lag][1][0] = real_sum + x[ 0][0] * x[lag][0] + x[ 0][1] * x[lag][1]; |
|
| 103 |
+ phi[2-lag][1][1] = imag_sum + x[ 0][0] * x[lag][1] - x[ 0][1] * x[lag][0]; |
|
| 104 |
+ if (lag == 1) {
|
|
| 105 |
+ phi[0][0][0] = real_sum + x[38][0] * x[39][0] + x[38][1] * x[39][1]; |
|
| 106 |
+ phi[0][0][1] = imag_sum + x[38][0] * x[39][1] - x[38][1] * x[39][0]; |
|
| 107 |
+ } |
|
| 108 |
+ } else {
|
|
| 109 |
+ for (i = 1; i < 38; i++) {
|
|
| 110 |
+ real_sum += x[i][0] * x[i][0] + x[i][1] * x[i][1]; |
|
| 111 |
+ } |
|
| 112 |
+ phi[2][1][0] = real_sum + x[ 0][0] * x[ 0][0] + x[ 0][1] * x[ 0][1]; |
|
| 113 |
+ phi[1][0][0] = real_sum + x[38][0] * x[38][0] + x[38][1] * x[38][1]; |
|
| 114 |
+ } |
|
| 115 |
+} |
|
| 116 |
+ |
|
| 117 |
+static void sbr_autocorrelate_c(const float x[40][2], float phi[3][2][2]) |
|
| 118 |
+{
|
|
| 119 |
+ autocorrelate(x, phi, 0); |
|
| 120 |
+ autocorrelate(x, phi, 1); |
|
| 121 |
+ autocorrelate(x, phi, 2); |
|
| 122 |
+} |
|
| 123 |
+ |
|
| 124 |
+static void sbr_hf_gen_c(float (*X_high)[2], const float (*X_low)[2], |
|
| 125 |
+ const float alpha0[2], const float alpha1[2], |
|
| 126 |
+ float bw, int start, int end) |
|
| 127 |
+{
|
|
| 128 |
+ float alpha[4]; |
|
| 129 |
+ int i; |
|
| 130 |
+ |
|
| 131 |
+ alpha[0] = alpha1[0] * bw * bw; |
|
| 132 |
+ alpha[1] = alpha1[1] * bw * bw; |
|
| 133 |
+ alpha[2] = alpha0[0] * bw; |
|
| 134 |
+ alpha[3] = alpha0[1] * bw; |
|
| 135 |
+ |
|
| 136 |
+ for (i = start; i < end; i++) {
|
|
| 137 |
+ X_high[i][0] = |
|
| 138 |
+ X_low[i - 2][0] * alpha[0] - |
|
| 139 |
+ X_low[i - 2][1] * alpha[1] + |
|
| 140 |
+ X_low[i - 1][0] * alpha[2] - |
|
| 141 |
+ X_low[i - 1][1] * alpha[3] + |
|
| 142 |
+ X_low[i][0]; |
|
| 143 |
+ X_high[i][1] = |
|
| 144 |
+ X_low[i - 2][1] * alpha[0] + |
|
| 145 |
+ X_low[i - 2][0] * alpha[1] + |
|
| 146 |
+ X_low[i - 1][1] * alpha[2] + |
|
| 147 |
+ X_low[i - 1][0] * alpha[3] + |
|
| 148 |
+ X_low[i][1]; |
|
| 149 |
+ } |
|
| 150 |
+} |
|
| 151 |
+ |
|
| 152 |
+static void sbr_hf_g_filt_c(float (*Y)[2], const float (*X_high)[40][2], |
|
| 153 |
+ const float *g_filt, int m_max, int ixh) |
|
| 154 |
+{
|
|
| 155 |
+ int m; |
|
| 156 |
+ |
|
| 157 |
+ for (m = 0; m < m_max; m++) {
|
|
| 158 |
+ Y[m][0] = X_high[m][ixh][0] * g_filt[m]; |
|
| 159 |
+ Y[m][1] = X_high[m][ixh][1] * g_filt[m]; |
|
| 160 |
+ } |
|
| 161 |
+} |
|
| 162 |
+ |
|
| 163 |
+static av_always_inline void sbr_hf_apply_noise(float (*Y)[2], |
|
| 164 |
+ const float *s_m, |
|
| 165 |
+ const float *q_filt, |
|
| 166 |
+ int noise, |
|
| 167 |
+ float phi_sign0, |
|
| 168 |
+ float phi_sign1, |
|
| 169 |
+ int m_max) |
|
| 170 |
+{
|
|
| 171 |
+ int m; |
|
| 172 |
+ |
|
| 173 |
+ for (m = 0; m < m_max; m++) {
|
|
| 174 |
+ float y0 = Y[m][0]; |
|
| 175 |
+ float y1 = Y[m][1]; |
|
| 176 |
+ noise = (noise + 1) & 0x1ff; |
|
| 177 |
+ if (s_m[m]) {
|
|
| 178 |
+ y0 += s_m[m] * phi_sign0; |
|
| 179 |
+ y1 += s_m[m] * phi_sign1; |
|
| 180 |
+ } else {
|
|
| 181 |
+ y0 += q_filt[m] * ff_sbr_noise_table[noise][0]; |
|
| 182 |
+ y1 += q_filt[m] * ff_sbr_noise_table[noise][1]; |
|
| 183 |
+ } |
|
| 184 |
+ Y[m][0] = y0; |
|
| 185 |
+ Y[m][1] = y1; |
|
| 186 |
+ phi_sign1 = -phi_sign1; |
|
| 187 |
+ } |
|
| 188 |
+} |
|
| 189 |
+ |
|
| 190 |
+static void sbr_hf_apply_noise_0(float (*Y)[2], const float *s_m, |
|
| 191 |
+ const float *q_filt, int noise, |
|
| 192 |
+ int kx, int m_max) |
|
| 193 |
+{
|
|
| 194 |
+ sbr_hf_apply_noise(Y, s_m, q_filt, noise, 1.0, 0.0, m_max); |
|
| 195 |
+} |
|
| 196 |
+ |
|
| 197 |
+static void sbr_hf_apply_noise_1(float (*Y)[2], const float *s_m, |
|
| 198 |
+ const float *q_filt, int noise, |
|
| 199 |
+ int kx, int m_max) |
|
| 200 |
+{
|
|
| 201 |
+ float phi_sign = 1 - 2 * (kx & 1); |
|
| 202 |
+ sbr_hf_apply_noise(Y, s_m, q_filt, noise, 0.0, phi_sign, m_max); |
|
| 203 |
+} |
|
| 204 |
+ |
|
| 205 |
+static void sbr_hf_apply_noise_2(float (*Y)[2], const float *s_m, |
|
| 206 |
+ const float *q_filt, int noise, |
|
| 207 |
+ int kx, int m_max) |
|
| 208 |
+{
|
|
| 209 |
+ sbr_hf_apply_noise(Y, s_m, q_filt, noise, -1.0, 0.0, m_max); |
|
| 210 |
+} |
|
| 211 |
+ |
|
| 212 |
+static void sbr_hf_apply_noise_3(float (*Y)[2], const float *s_m, |
|
| 213 |
+ const float *q_filt, int noise, |
|
| 214 |
+ int kx, int m_max) |
|
| 215 |
+{
|
|
| 216 |
+ float phi_sign = 1 - 2 * (kx & 1); |
|
| 217 |
+ sbr_hf_apply_noise(Y, s_m, q_filt, noise, 0.0, -phi_sign, m_max); |
|
| 218 |
+} |
|
| 219 |
+ |
|
| 220 |
+av_cold void ff_sbrdsp_init(SBRDSPContext *s) |
|
| 221 |
+{
|
|
| 222 |
+ s->sum64x5 = sbr_sum64x5_c; |
|
| 223 |
+ s->sum_square = sbr_sum_square_c; |
|
| 224 |
+ s->neg_odd_64 = sbr_neg_odd_64_c; |
|
| 225 |
+ s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_c; |
|
| 226 |
+ s->qmf_post_shuffle = sbr_qmf_post_shuffle_c; |
|
| 227 |
+ s->qmf_deint_neg = sbr_qmf_deint_neg_c; |
|
| 228 |
+ s->qmf_deint_bfly = sbr_qmf_deint_bfly_c; |
|
| 229 |
+ s->autocorrelate = sbr_autocorrelate_c; |
|
| 230 |
+ s->hf_gen = sbr_hf_gen_c; |
|
| 231 |
+ s->hf_g_filt = sbr_hf_g_filt_c; |
|
| 232 |
+ |
|
| 233 |
+ s->hf_apply_noise[0] = sbr_hf_apply_noise_0; |
|
| 234 |
+ s->hf_apply_noise[1] = sbr_hf_apply_noise_1; |
|
| 235 |
+ s->hf_apply_noise[2] = sbr_hf_apply_noise_2; |
|
| 236 |
+ s->hf_apply_noise[3] = sbr_hf_apply_noise_3; |
|
| 237 |
+ |
|
| 238 |
+ if (ARCH_ARM) |
|
| 239 |
+ ff_sbrdsp_init_arm(s); |
|
| 240 |
+} |
| 0 | 241 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,48 @@ |
| 0 |
+/* |
|
| 1 |
+ * Copyright (c) 2012 Mans Rullgard |
|
| 2 |
+ * |
|
| 3 |
+ * This file is part of Libav. |
|
| 4 |
+ * |
|
| 5 |
+ * Libav is free software; you can redistribute it and/or |
|
| 6 |
+ * modify it under the terms of the GNU Lesser General Public |
|
| 7 |
+ * License as published by the Free Software Foundation; either |
|
| 8 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
| 9 |
+ * |
|
| 10 |
+ * Libav is distributed in the hope that it will be useful, |
|
| 11 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
| 12 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
| 13 |
+ * Lesser General Public License for more details. |
|
| 14 |
+ * |
|
| 15 |
+ * You should have received a copy of the GNU Lesser General Public |
|
| 16 |
+ * License along with Libav; if not, write to the Free Software |
|
| 17 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
| 18 |
+ */ |
|
| 19 |
+ |
|
| 20 |
+#ifndef LIBAVCODEC_SBRDSP_H |
|
| 21 |
+#define LIBAVCODEC_SBRDSP_H |
|
| 22 |
+ |
|
| 23 |
+typedef struct SBRDSPContext {
|
|
| 24 |
+ void (*sum64x5)(float *z); |
|
| 25 |
+ float (*sum_square)(float (*x)[2], int n); |
|
| 26 |
+ void (*neg_odd_64)(float *x); |
|
| 27 |
+ void (*qmf_pre_shuffle)(float *z); |
|
| 28 |
+ void (*qmf_post_shuffle)(float W[32][2], const float *z); |
|
| 29 |
+ void (*qmf_deint_neg)(float *v, const float *src); |
|
| 30 |
+ void (*qmf_deint_bfly)(float *v, const float *src0, const float *src1); |
|
| 31 |
+ void (*autocorrelate)(const float x[40][2], float phi[3][2][2]); |
|
| 32 |
+ void (*hf_gen)(float (*X_high)[2], const float (*X_low)[2], |
|
| 33 |
+ const float alpha0[2], const float alpha1[2], |
|
| 34 |
+ float bw, int start, int end); |
|
| 35 |
+ void (*hf_g_filt)(float (*Y)[2], const float (*X_high)[40][2], |
|
| 36 |
+ const float *g_filt, int m_max, int ixh); |
|
| 37 |
+ void (*hf_apply_noise[4])(float (*Y)[2], const float *s_m, |
|
| 38 |
+ const float *q_filt, int noise, |
|
| 39 |
+ int kx, int m_max); |
|
| 40 |
+} SBRDSPContext; |
|
| 41 |
+ |
|
| 42 |
+extern const float ff_sbr_noise_table[][2]; |
|
| 43 |
+ |
|
| 44 |
+void ff_sbrdsp_init(SBRDSPContext *s); |
|
| 45 |
+void ff_sbrdsp_init_arm(SBRDSPContext *s); |
|
| 46 |
+ |
|
| 47 |
+#endif |
| ... | ... |
@@ -242,7 +242,7 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, |
| 242 | 242 |
*height+=2; // some of the optimized chroma MC reads one line too much |
| 243 | 243 |
// which is also done in mpeg decoders with lowres > 0 |
| 244 | 244 |
|
| 245 |
- for (i = 0; i < AV_NUM_DATA_POINTERS; i++) |
|
| 245 |
+ for (i = 0; i < 4; i++) |
|
| 246 | 246 |
linesize_align[i] = STRIDE_ALIGN; |
| 247 | 247 |
//STRIDE_ALIGN is 8 for SSE* but this does not work for SVQ1 chroma planes |
| 248 | 248 |
//we could change STRIDE_ALIGN to 16 for x86/sse but it would increase the |
| ... | ... |
@@ -252,7 +252,7 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height, |
| 252 | 252 |
if(s->codec_id == CODEC_ID_SVQ1 || s->codec_id == CODEC_ID_VP5 || |
| 253 | 253 |
s->codec_id == CODEC_ID_VP6 || s->codec_id == CODEC_ID_VP6F || |
| 254 | 254 |
s->codec_id == CODEC_ID_VP6A || s->codec_id == CODEC_ID_DIRAC) {
|
| 255 |
- for (i = 0; i < AV_NUM_DATA_POINTERS; i++) |
|
| 255 |
+ for (i = 0; i < 4; i++) |
|
| 256 | 256 |
linesize_align[i] = 16; |
| 257 | 257 |
} |
| 258 | 258 |
#endif |