IDCTs are partially evaluated according to IEEE 1180-1990 (more or
less). An override is added to the table for implementations known
to not meet the spec requirements. These variants are run but not
checked for accuracy.
Signed-off-by: Mans Rullgard <mans@mansr.com>
... | ... |
@@ -74,6 +74,7 @@ struct algo { |
74 | 74 |
enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, |
75 | 75 |
SSE2_PERM, PARTTRANS_PERM } format; |
76 | 76 |
int mm_support; |
77 |
+ int nonspec; |
|
77 | 78 |
}; |
78 | 79 |
|
79 | 80 |
#ifndef FAAN_POSTSCALE |
... | ... |
@@ -115,13 +116,13 @@ static const struct algo idct_tab[] = { |
115 | 115 |
|
116 | 116 |
#if HAVE_MMX |
117 | 117 |
#if CONFIG_GPL |
118 |
- {"LIBMPEG2-MMX", ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX}, |
|
119 |
- {"LIBMPEG2-MMX2", ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2}, |
|
118 |
+ {"LIBMPEG2-MMX", ff_mmx_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1}, |
|
119 |
+ {"LIBMPEG2-MMX2", ff_mmxext_idct, ff_ref_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1}, |
|
120 | 120 |
#endif |
121 | 121 |
{"SIMPLE-MMX", ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX}, |
122 |
- {"XVID-MMX", ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX}, |
|
123 |
- {"XVID-MMX2", ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2}, |
|
124 |
- {"XVID-SSE2", ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2}, |
|
122 |
+ {"XVID-MMX", ff_idct_xvid_mmx, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX, 1}, |
|
123 |
+ {"XVID-MMX2", ff_idct_xvid_mmx2, ff_ref_idct, NO_PERM, AV_CPU_FLAG_MMX2, 1}, |
|
124 |
+ {"XVID-SSE2", ff_idct_xvid_sse2, ff_ref_idct, SSE2_PERM, AV_CPU_FLAG_SSE2, 1}, |
|
125 | 125 |
#endif |
126 | 126 |
|
127 | 127 |
#if ARCH_BFIN |
... | ... |
@@ -200,15 +201,17 @@ static inline void mmx_emms(void) |
200 | 200 |
#endif |
201 | 201 |
} |
202 | 202 |
|
203 |
-static void dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
203 |
+static int dct_error(const struct algo *dct, int test, int is_idct, int speed) |
|
204 | 204 |
{ |
205 | 205 |
int it, i, scale; |
206 | 206 |
int err_inf, v; |
207 |
- int64_t err2, ti, ti1, it1; |
|
207 |
+ int64_t err2, ti, ti1, it1, err_sum = 0; |
|
208 | 208 |
int64_t sysErr[64], sysErrMax = 0; |
209 | 209 |
int maxout = 0; |
210 | 210 |
int blockSumErrMax = 0, blockSumErr; |
211 | 211 |
AVLFG prng; |
212 |
+ double omse, ome; |
|
213 |
+ int spec_err; |
|
212 | 214 |
|
213 | 215 |
av_lfg_init(&prng, 1); |
214 | 216 |
|
... | ... |
@@ -276,7 +279,9 @@ static void dct_error(const struct algo *dct, int test, int is_idct, int speed) |
276 | 276 |
|
277 | 277 |
blockSumErr = 0; |
278 | 278 |
for (i = 0; i < 64; i++) { |
279 |
- v = abs(block[i] - block1[i]); |
|
279 |
+ int err = block[i] - block1[i]; |
|
280 |
+ err_sum += err; |
|
281 |
+ v = abs(err); |
|
280 | 282 |
if (v > err_inf) |
281 | 283 |
err_inf = v; |
282 | 284 |
err2 += v * v; |
... | ... |
@@ -298,13 +303,21 @@ static void dct_error(const struct algo *dct, int test, int is_idct, int speed) |
298 | 298 |
} |
299 | 299 |
printf("\n"); |
300 | 300 |
|
301 |
- printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
|
301 |
+ omse = (double) err2 / NB_ITS / 64; |
|
302 |
+ ome = (double) err_sum / NB_ITS / 64; |
|
303 |
+ |
|
304 |
+ spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015); |
|
305 |
+ |
|
306 |
+ printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
|
302 | 307 |
is_idct ? "IDCT" : "DCT", dct->name, err_inf, |
303 |
- (double) err2 / NB_ITS / 64.0, (double) sysErrMax / NB_ITS, |
|
308 |
+ omse, ome, (double) sysErrMax / NB_ITS, |
|
304 | 309 |
maxout, blockSumErrMax); |
305 | 310 |
|
311 |
+ if (spec_err && !dct->nonspec) |
|
312 |
+ return 1; |
|
313 |
+ |
|
306 | 314 |
if (!speed) |
307 |
- return; |
|
315 |
+ return 0; |
|
308 | 316 |
|
309 | 317 |
/* speed test */ |
310 | 318 |
for (i = 0; i < 64; i++) |
... | ... |
@@ -355,6 +368,8 @@ static void dct_error(const struct algo *dct, int test, int is_idct, int speed) |
355 | 355 |
|
356 | 356 |
printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name, |
357 | 357 |
(double) it1 * 1000.0 / (double) ti1); |
358 |
+ |
|
359 |
+ return 0; |
|
358 | 360 |
} |
359 | 361 |
|
360 | 362 |
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; |
... | ... |
@@ -514,6 +529,7 @@ int main(int argc, char **argv) |
514 | 514 |
int c, i; |
515 | 515 |
int test = 1; |
516 | 516 |
int speed = 0; |
517 |
+ int err = 0; |
|
517 | 518 |
|
518 | 519 |
cpu_flags = av_get_cpu_flags(); |
519 | 520 |
|
... | ... |
@@ -559,8 +575,9 @@ int main(int argc, char **argv) |
559 | 559 |
const struct algo *algos = test_idct ? idct_tab : fdct_tab; |
560 | 560 |
for (i = 0; algos[i].name; i++) |
561 | 561 |
if (!(~cpu_flags & algos[i].mm_support)) { |
562 |
- dct_error(&algos[i], test, test_idct, speed); |
|
562 |
+ err |= dct_error(&algos[i], test, test_idct, speed); |
|
563 | 563 |
} |
564 | 564 |
} |
565 |
- return 0; |
|
565 |
+ |
|
566 |
+ return err; |
|
566 | 567 |
} |