* qatar/master:
proresdsp: port x86 assembly to cpuflags.
lavr: x86: improve non-SSE4 version of S16_TO_S32_SX macro
lavfi: better channel layout negotiation
alac: check for truncated packets
alac: reverse lpc coeff order, simplify filter
lavr: add x86-optimized mixing functions
x86: add support for fmaddps fma4 instruction with abstraction to avx/sse
tscc2: fix typo in array index
build: use COMPILE template for HOSTOBJS
build: do full flag handling for all compiler-type tools
eval: fix printing of NaN in eval fate test.
build: Rename aandct component to more descriptive aandcttables
mpegaudio: bury inline asm under HAVE_INLINE_ASM.
x86inc: automatically insert vzeroupper for YMM functions.
rtmp: Check the buffer length of ping packets
rtmp: Allow having more unknown data at the end of a chunk size packet without failing
rtmp: Prevent reading outside of an allocate buffer when receiving server bandwidth packets
Conflicts:
Makefile
configure
libavcodec/x86/proresdsp.asm
libavutil/eval.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
... | ... |
@@ -11,7 +11,7 @@ ifndef V |
11 | 11 |
Q = @ |
12 | 12 |
ECHO = printf "$(1)\t%s\n" $(2) |
13 | 13 |
BRIEF = CC CXX AS YASM AR LD HOSTCC STRIP CP |
14 |
-SILENT = DEPCC YASMDEP RM RANLIB |
|
14 |
+SILENT = DEPCC DEPAS DEPHOSTCC YASMDEP RM RANLIB |
|
15 | 15 |
MSG = $@ |
16 | 16 |
M = @$(call ECHO,$(TAG),$@); |
17 | 17 |
$(foreach VAR,$(BRIEF), \ |
... | ... |
@@ -26,15 +26,16 @@ ALLFFLIBS = avcodec avdevice avfilter avformat avresample avutil postproc swscal |
26 | 26 |
IFLAGS := -I. -I$(SRC_PATH)/ |
27 | 27 |
CPPFLAGS := $(IFLAGS) $(CPPFLAGS) |
28 | 28 |
CFLAGS += $(ECFLAGS) |
29 |
-CCFLAGS = $(CFLAGS) |
|
29 |
+CCFLAGS = $(CPPFLAGS) $(CFLAGS) |
|
30 |
+ASFLAGS := $(CPPFLAGS) $(ASFLAGS) |
|
30 | 31 |
CXXFLAGS := $(CFLAGS) $(CXXFLAGS) |
31 | 32 |
YASMFLAGS += $(IFLAGS) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm |
32 |
-HOSTCFLAGS += $(IFLAGS) |
|
33 |
+HOSTCCFLAGS = $(IFLAGS) $(HOSTCFLAGS) |
|
33 | 34 |
LDFLAGS := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS) |
34 | 35 |
|
35 | 36 |
define COMPILE |
36 |
- $($(1)DEP) |
|
37 |
- $($(1)) $(CPPFLAGS) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $< |
|
37 |
+ $(call $(1)DEP,$(1)) |
|
38 |
+ $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $< |
|
38 | 39 |
endef |
39 | 40 |
|
40 | 41 |
COMPILE_C = $(call COMPILE,CC) |
... | ... |
@@ -101,7 +102,7 @@ checkheaders: $(filter-out $(SKIPHEADERS:.h=.ho),$(ALLHEADERS:.h=.ho)) |
101 | 101 |
alltools: $(TOOLS) |
102 | 102 |
|
103 | 103 |
$(HOSTOBJS): %.o: %.c |
104 |
- $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $< |
|
104 |
+ $(call COMPILE,HOSTCC) |
|
105 | 105 |
|
106 | 106 |
$(HOSTPROGS): %$(HOSTEXESUF): %.o |
107 | 107 |
$(HOSTCC) $(HOSTLDFLAGS) -o $@ $< $(HOSTLIBS) |
... | ... |
@@ -117,4 +118,4 @@ CLEANSUFFIXES = *.d *.o *~ *.ho *.map *.ver *.gcno *.gcda |
117 | 117 |
DISTCLEANSUFFIXES = *.pc |
118 | 118 |
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a |
119 | 119 |
|
120 |
--include $(wildcard $(OBJS:.o=.d) $(TESTOBJS:.o=.d)) |
|
120 |
+-include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d)) |
... | ... |
@@ -265,6 +265,7 @@ Optimization options (experts only): |
265 | 265 |
--disable-sse disable SSE optimizations |
266 | 266 |
--disable-ssse3 disable SSSE3 optimizations |
267 | 267 |
--disable-avx disable AVX optimizations |
268 |
+ --disable-fma4 disable FMA4 optimizations |
|
268 | 269 |
--disable-armv5te disable armv5te optimizations |
269 | 270 |
--disable-armv6 disable armv6 optimizations |
270 | 271 |
--disable-armv6t2 disable armv6t2 optimizations |
... | ... |
@@ -1173,6 +1174,7 @@ ARCH_EXT_LIST=' |
1173 | 1173 |
armv6t2 |
1174 | 1174 |
armvfp |
1175 | 1175 |
avx |
1176 |
+ fma4 |
|
1176 | 1177 |
mmi |
1177 | 1178 |
mmx |
1178 | 1179 |
mmx2 |
... | ... |
@@ -1336,7 +1338,7 @@ HAVE_LIST=" |
1336 | 1336 |
|
1337 | 1337 |
# options emitted with CONFIG_ prefix but not available on command line |
1338 | 1338 |
CONFIG_EXTRA=" |
1339 |
- aandct |
|
1339 |
+ aandcttables |
|
1340 | 1340 |
avutil |
1341 | 1341 |
golomb |
1342 | 1342 |
gplv3 |
... | ... |
@@ -1450,6 +1452,7 @@ mmx2_deps="mmx" |
1450 | 1450 |
sse_deps="mmx" |
1451 | 1451 |
ssse3_deps="sse" |
1452 | 1452 |
avx_deps="ssse3" |
1453 |
+fma4_deps="avx" |
|
1453 | 1454 |
|
1454 | 1455 |
aligned_stack_if_any="ppc x86" |
1455 | 1456 |
fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" |
... | ... |
@@ -1477,7 +1480,7 @@ ac3_fixed_encoder_select="mdct ac3dsp" |
1477 | 1477 |
alac_encoder_select="lpc" |
1478 | 1478 |
amrnb_decoder_select="lsp" |
1479 | 1479 |
amrwb_decoder_select="lsp" |
1480 |
-amv_encoder_select="aandct" |
|
1480 |
+amv_encoder_select="aandcttables" |
|
1481 | 1481 |
atrac1_decoder_select="mdct sinewin" |
1482 | 1482 |
atrac3_decoder_select="mdct" |
1483 | 1483 |
binkaudio_dct_decoder_select="mdct rdft dct sinewin" |
... | ... |
@@ -1487,13 +1490,13 @@ cook_decoder_select="mdct sinewin" |
1487 | 1487 |
cscd_decoder_suggest="zlib" |
1488 | 1488 |
dca_decoder_select="mdct" |
1489 | 1489 |
dirac_decoder_select="dwt golomb" |
1490 |
-dnxhd_encoder_select="aandct" |
|
1490 |
+dnxhd_encoder_select="aandcttables" |
|
1491 | 1491 |
dxa_decoder_select="zlib" |
1492 | 1492 |
eac3_decoder_select="ac3_decoder" |
1493 | 1493 |
eac3_encoder_select="mdct ac3dsp" |
1494 |
-eamad_decoder_select="aandct" |
|
1495 |
-eatgq_decoder_select="aandct" |
|
1496 |
-eatqi_decoder_select="aandct" |
|
1494 |
+eamad_decoder_select="aandcttables" |
|
1495 |
+eatgq_decoder_select="aandcttables" |
|
1496 |
+eatqi_decoder_select="aandcttables" |
|
1497 | 1497 |
exr_decoder_select="zlib" |
1498 | 1498 |
ffv1_decoder_select="golomb" |
1499 | 1499 |
flac_decoder_select="golomb" |
... | ... |
@@ -1505,9 +1508,9 @@ flashsv2_decoder_select="zlib" |
1505 | 1505 |
flv_decoder_select="h263_decoder" |
1506 | 1506 |
flv_encoder_select="h263_encoder" |
1507 | 1507 |
fraps_decoder_select="huffman" |
1508 |
-h261_encoder_select="aandct" |
|
1508 |
+h261_encoder_select="aandcttables" |
|
1509 | 1509 |
h263_decoder_select="h263_parser" |
1510 |
-h263_encoder_select="aandct" |
|
1510 |
+h263_encoder_select="aandcttables" |
|
1511 | 1511 |
h263_vaapi_hwaccel_select="vaapi h263_decoder" |
1512 | 1512 |
h263i_decoder_select="h263_decoder" |
1513 | 1513 |
h263p_encoder_select="h263_encoder" |
... | ... |
@@ -1523,9 +1526,9 @@ iac_decoder_select="fft mdct sinewin" |
1523 | 1523 |
imc_decoder_select="fft mdct sinewin" |
1524 | 1524 |
jpegls_decoder_select="golomb" |
1525 | 1525 |
jpegls_encoder_select="golomb" |
1526 |
-ljpeg_encoder_select="aandct" |
|
1526 |
+ljpeg_encoder_select="aandcttables" |
|
1527 | 1527 |
loco_decoder_select="golomb" |
1528 |
-mjpeg_encoder_select="aandct" |
|
1528 |
+mjpeg_encoder_select="aandcttables" |
|
1529 | 1529 |
mlp_decoder_select="mlp_parser" |
1530 | 1530 |
mp1_decoder_select="mpegaudiodsp" |
1531 | 1531 |
mp1float_decoder_select="mpegaudiodsp" |
... | ... |
@@ -1544,13 +1547,13 @@ mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h" |
1544 | 1544 |
mpeg_xvmc_decoder_select="mpegvideo_decoder" |
1545 | 1545 |
mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder" |
1546 | 1546 |
mpeg1_vdpau_hwaccel_select="vdpau mpeg1video_decoder" |
1547 |
-mpeg1video_encoder_select="aandct" |
|
1547 |
+mpeg1video_encoder_select="aandcttables" |
|
1548 | 1548 |
mpeg2_crystalhd_decoder_select="crystalhd" |
1549 | 1549 |
mpeg2_dxva2_hwaccel_deps="dxva2api_h" |
1550 | 1550 |
mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder" |
1551 | 1551 |
mpeg2_vdpau_hwaccel_select="vdpau mpeg2video_decoder" |
1552 | 1552 |
mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder" |
1553 |
-mpeg2video_encoder_select="aandct" |
|
1553 |
+mpeg2video_encoder_select="aandcttables" |
|
1554 | 1554 |
mpeg4_crystalhd_decoder_select="crystalhd" |
1555 | 1555 |
mpeg4_decoder_select="h263_decoder mpeg4video_parser" |
1556 | 1556 |
mpeg4_encoder_select="h263_encoder" |
... | ... |
@@ -1580,11 +1583,11 @@ rv40_decoder_select="golomb h264chroma h264pred h264qpel" |
1580 | 1580 |
shorten_decoder_select="golomb" |
1581 | 1581 |
sipr_decoder_select="lsp" |
1582 | 1582 |
snow_decoder_select="dwt" |
1583 |
-snow_encoder_select="aandct dwt" |
|
1583 |
+snow_encoder_select="aandcttables dwt" |
|
1584 | 1584 |
sonic_decoder_select="golomb" |
1585 | 1585 |
sonic_encoder_select="golomb" |
1586 | 1586 |
sonic_ls_encoder_select="golomb" |
1587 |
-svq1_encoder_select="aandct" |
|
1587 |
+svq1_encoder_select="aandcttables" |
|
1588 | 1588 |
svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel" |
1589 | 1589 |
svq3_decoder_suggest="zlib" |
1590 | 1590 |
theora_decoder_select="vp3_decoder" |
... | ... |
@@ -1965,6 +1968,8 @@ ldflags_filter=echo |
1965 | 1965 |
AS_O='-o $@' |
1966 | 1966 |
CC_O='-o $@' |
1967 | 1967 |
CXX_O='-o $@' |
1968 |
+LD_O='-o $@' |
|
1969 |
+HOSTCC_O='-o $@' |
|
1968 | 1970 |
|
1969 | 1971 |
host_cflags='-D_ISOC99_SOURCE -D_XOPEN_SOURCE=600 -O3 -g' |
1970 | 1972 |
host_libs='-lm' |
... | ... |
@@ -1975,8 +1980,8 @@ target_path='$(CURDIR)' |
1975 | 1975 |
|
1976 | 1976 |
# since the object filename is not given with the -MM flag, the compiler |
1977 | 1977 |
# is only able to print the basename, and we must add the path ourselves |
1978 |
-DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)' |
|
1979 |
-DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -MM' |
|
1978 |
+DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)' |
|
1979 |
+DEPFLAGS='-MM' |
|
1980 | 1980 |
|
1981 | 1981 |
# find source path |
1982 | 1982 |
if test -f configure; then |
... | ... |
@@ -2319,120 +2324,150 @@ tms470_flags(){ |
2319 | 2319 |
done |
2320 | 2320 |
} |
2321 | 2321 |
|
2322 |
-if $cc -v 2>&1 | grep -q '^gcc.*LLVM'; then |
|
2323 |
- cc_type=llvm_gcc |
|
2324 |
- gcc_extra_ver=$(expr "$($cc --version | head -n1)" : '.*\((.*)\)') |
|
2325 |
- cc_ident="llvm-gcc $($cc -dumpversion) $gcc_extra_ver" |
|
2326 |
- CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2327 |
- AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2328 |
- cflags_speed='-O3' |
|
2329 |
- cflags_size='-Os' |
|
2330 |
-elif $cc -v 2>&1 | grep -qi ^gcc; then |
|
2331 |
- cc_type=gcc |
|
2332 |
- gcc_version=$($cc --version | head -n1) |
|
2333 |
- gcc_basever=$($cc -dumpversion) |
|
2334 |
- gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)') |
|
2335 |
- gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)") |
|
2336 |
- cc_ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver") |
|
2337 |
- if ! $cc -dumpversion | grep -q '^2\.'; then |
|
2338 |
- CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2339 |
- AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2322 |
+probe_cc(){ |
|
2323 |
+ pfx=$1 |
|
2324 |
+ _cc=$2 |
|
2325 |
+ |
|
2326 |
+ unset _type _ident _cc_o _flags _cflags _ldflags _depflags _DEPCMD _DEPFLAGS |
|
2327 |
+ _flags_filter=echo |
|
2328 |
+ |
|
2329 |
+ if $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then |
|
2330 |
+ _type=llvm_gcc |
|
2331 |
+ gcc_extra_ver=$(expr "$($_cc --version | head -n1)" : '.*\((.*)\)') |
|
2332 |
+ _ident="llvm-gcc $($_cc -dumpversion) $gcc_extra_ver" |
|
2333 |
+ _depflags='-MMD -MF $(@:.o=.d) -MT $@' |
|
2334 |
+ _cflags_speed='-O3' |
|
2335 |
+ _cflags_size='-Os' |
|
2336 |
+ elif $_cc -v 2>&1 | grep -qi ^gcc; then |
|
2337 |
+ _type=gcc |
|
2338 |
+ gcc_version=$($_cc --version | head -n1) |
|
2339 |
+ gcc_basever=$($_cc -dumpversion) |
|
2340 |
+ gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)') |
|
2341 |
+ gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)") |
|
2342 |
+ _ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver") |
|
2343 |
+ if ! $_cc -dumpversion | grep -q '^2\.'; then |
|
2344 |
+ _depflags='-MMD -MF $(@:.o=.d) -MT $@' |
|
2345 |
+ fi |
|
2346 |
+ _cflags_speed='-O3' |
|
2347 |
+ _cflags_size='-Os' |
|
2348 |
+ elif $_cc --version 2>/dev/null | grep -q Intel; then |
|
2349 |
+ _type=icc |
|
2350 |
+ _ident=$($_cc --version | head -n1) |
|
2351 |
+ _depflags='-MMD' |
|
2352 |
+ _cflags_speed='-O3' |
|
2353 |
+ _cflags_size='-Os' |
|
2354 |
+ _cflags_noopt='-O1' |
|
2355 |
+ elif $_cc -v 2>&1 | grep -q xlc; then |
|
2356 |
+ _type=xlc |
|
2357 |
+ _ident=$($_cc -qversion 2>/dev/null | head -n1) |
|
2358 |
+ _cflags_speed='-O5' |
|
2359 |
+ _cflags_size='-O5 -qcompact' |
|
2360 |
+ elif $_cc -V 2>/dev/null | grep -q Compaq; then |
|
2361 |
+ _type=ccc |
|
2362 |
+ _ident=$($_cc -V | head -n1 | cut -d' ' -f1-3) |
|
2363 |
+ _DEPFLAGS='-M' |
|
2364 |
+ debuglevel=3 |
|
2365 |
+ _ldflags='-Wl,-z,now' # calls to libots crash without this |
|
2366 |
+ _cflags_speed='-fast' |
|
2367 |
+ _cflags_size='-O1' |
|
2368 |
+ elif $_cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then |
|
2369 |
+ test -d "$sysroot" || die "No valid sysroot specified." |
|
2370 |
+ _type=armcc |
|
2371 |
+ _ident=$($_cc --vsn | head -n1) |
|
2372 |
+ armcc_conf="$PWD/armcc.conf" |
|
2373 |
+ $_cc --arm_linux_configure \ |
|
2374 |
+ --arm_linux_config_file="$armcc_conf" \ |
|
2375 |
+ --configure_sysroot="$sysroot" \ |
|
2376 |
+ --configure_cpp_headers="$sysinclude" >>$logfile 2>&1 || |
|
2377 |
+ die "Error creating armcc configuration file." |
|
2378 |
+ $_cc --vsn | grep -q RVCT && armcc_opt=rvct || armcc_opt=armcc |
|
2379 |
+ _flags="--arm_linux_config_file=$armcc_conf --translate_gcc" |
|
2380 |
+ as_default="${cross_prefix}gcc" |
|
2381 |
+ _depflags='-MMD' |
|
2382 |
+ _cflags_speed='-O3' |
|
2383 |
+ _cflags_size='-Os' |
|
2384 |
+ elif $_cc -version 2>/dev/null | grep -q TMS470; then |
|
2385 |
+ _type=tms470 |
|
2386 |
+ _ident=$($_cc -version | head -n1 | tr -s ' ') |
|
2387 |
+ _flags='--gcc --abi=eabi -me' |
|
2388 |
+ _cflags='-D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__=' |
|
2389 |
+ _cc_o='-fe=$@' |
|
2390 |
+ as_default="${cross_prefix}gcc" |
|
2391 |
+ ld_default="${cross_prefix}gcc" |
|
2392 |
+ _depflags='-ppa -ppd=$(@:.o=.d)' |
|
2393 |
+ _cflags_speed='-O3 -mf=5' |
|
2394 |
+ _cflags_size='-O3 -mf=2' |
|
2395 |
+ _flags_filter=tms470_flags |
|
2396 |
+ elif $_cc -v 2>&1 | grep -q clang; then |
|
2397 |
+ _type=clang |
|
2398 |
+ _ident=$($_cc --version | head -n1) |
|
2399 |
+ _depflags='-MMD' |
|
2400 |
+ _cflags_speed='-O3' |
|
2401 |
+ _cflags_size='-Os' |
|
2402 |
+ elif $_cc -V 2>&1 | grep -q Sun; then |
|
2403 |
+ _type=suncc |
|
2404 |
+ _ident=$($_cc -V 2>&1 | head -n1 | cut -d' ' -f 2-) |
|
2405 |
+ _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)' |
|
2406 |
+ _DEPFLAGS='-xM1' |
|
2407 |
+ _ldflags='-std=c99' |
|
2408 |
+ _cflags_speed='-O5' |
|
2409 |
+ _cflags_size='-O5 -xspace' |
|
2410 |
+ _flags_filter=suncc_flags |
|
2411 |
+ elif $_cc -v 2>&1 | grep -q 'PathScale\|Path64'; then |
|
2412 |
+ _type=pathscale |
|
2413 |
+ _ident=$($_cc -v 2>&1 | head -n1 | tr -d :) |
|
2414 |
+ _depflags='-MMD -MF $(@:.o=.d) -MT $@' |
|
2415 |
+ _cflags_speed='-O2' |
|
2416 |
+ _cflags_size='-Os' |
|
2417 |
+ _flags_filter='filter_out -Wdisabled-optimization' |
|
2418 |
+ elif $_cc -v 2>&1 | grep -q Open64; then |
|
2419 |
+ _type=open64 |
|
2420 |
+ _ident=$($_cc -v 2>&1 | head -n1 | tr -d :) |
|
2421 |
+ _depflags='-MMD -MF $(@:.o=.d) -MT $@' |
|
2422 |
+ _cflags_speed='-O2' |
|
2423 |
+ _cflags_size='-Os' |
|
2424 |
+ _flags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros' |
|
2425 |
+ elif $_cc -V 2>&1 | grep -q Portland; then |
|
2426 |
+ _type=pgi |
|
2427 |
+ _ident="PGI $($_cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')" |
|
2428 |
+ opt_common='-alias=ansi -Mlre -Mpre' |
|
2429 |
+ _cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common" |
|
2430 |
+ _cflags_size="-O2 -Munroll=c:1 $opt_common" |
|
2431 |
+ _cflags_noopt="-O1" |
|
2432 |
+ _flags_filter=pgi_flags |
|
2340 | 2433 |
fi |
2341 |
- cflags_speed='-O3' |
|
2342 |
- cflags_size='-Os' |
|
2343 |
-elif $cc --version 2>/dev/null | grep -q Intel; then |
|
2344 |
- cc_type=icc |
|
2345 |
- cc_ident=$($cc --version | head -n1) |
|
2346 |
- CC_DEPFLAGS='-MMD' |
|
2347 |
- AS_DEPFLAGS='-MMD' |
|
2348 |
- cflags_speed='-O3' |
|
2349 |
- cflags_size='-Os' |
|
2350 |
- cflags_noopt='-O1' |
|
2351 |
-elif $cc -v 2>&1 | grep -q xlc; then |
|
2352 |
- cc_type=xlc |
|
2353 |
- cc_ident=$($cc -qversion 2>/dev/null | head -n1) |
|
2354 |
- cflags_speed='-O5' |
|
2355 |
- cflags_size='-O5 -qcompact' |
|
2356 |
-elif $cc -V 2>/dev/null | grep -q Compaq; then |
|
2357 |
- cc_type=ccc |
|
2358 |
- cc_ident=$($cc -V | head -n1 | cut -d' ' -f1-3) |
|
2359 |
- DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -M' |
|
2360 |
- debuglevel=3 |
|
2361 |
- add_ldflags -Wl,-z,now # calls to libots crash without this |
|
2362 |
- cflags_speed='-fast' |
|
2363 |
- cflags_size='-O1' |
|
2364 |
-elif $cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then |
|
2365 |
- test -d "$sysroot" || die "No valid sysroot specified." |
|
2366 |
- cc_type=armcc |
|
2367 |
- cc_ident=$($cc --vsn | head -n1) |
|
2368 |
- armcc_conf="$PWD/armcc.conf" |
|
2369 |
- $cc --arm_linux_configure \ |
|
2370 |
- --arm_linux_config_file="$armcc_conf" \ |
|
2371 |
- --configure_sysroot="$sysroot" \ |
|
2372 |
- --configure_cpp_headers="$sysinclude" >>$logfile 2>&1 || |
|
2373 |
- die "Error creating armcc configuration file." |
|
2374 |
- $cc --vsn | grep -q RVCT && armcc_opt=rvct || armcc_opt=armcc |
|
2375 |
- cc="$cc --arm_linux_config_file=$armcc_conf --translate_gcc" |
|
2376 |
- as_default="${cross_prefix}gcc" |
|
2377 |
- CC_DEPFLAGS='-MMD' |
|
2378 |
- AS_DEPFLAGS='-MMD' |
|
2379 |
- cflags_speed='-O3' |
|
2380 |
- cflags_size='-Os' |
|
2381 |
- asflags_filter="filter_out -W${armcc_opt}*" |
|
2382 |
-elif $cc -version 2>/dev/null | grep -q TMS470; then |
|
2383 |
- cc_type=tms470 |
|
2384 |
- cc_ident=$($cc -version | head -n1 | tr -s ' ') |
|
2385 |
- cc="$cc --gcc --abi=eabi -me" |
|
2386 |
- CC_O='-fe=$@' |
|
2387 |
- as_default="${cross_prefix}gcc" |
|
2388 |
- ld_default="${cross_prefix}gcc" |
|
2389 |
- add_cflags -D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__= |
|
2390 |
- CC_DEPFLAGS='-ppa -ppd=$(@:.o=.d)' |
|
2391 |
- AS_DEPFLAGS='-MMD' |
|
2392 |
- cflags_speed='-O3 -mf=5' |
|
2393 |
- cflags_size='-O3 -mf=2' |
|
2394 |
- cflags_filter=tms470_flags |
|
2395 |
-elif $cc -v 2>&1 | grep -q clang; then |
|
2396 |
- cc_type=clang |
|
2397 |
- cc_ident=$($cc --version | head -n1) |
|
2398 |
- CC_DEPFLAGS='-MMD' |
|
2399 |
- AS_DEPFLAGS='-MMD' |
|
2400 |
- cflags_speed='-O3' |
|
2401 |
- cflags_size='-Os' |
|
2402 |
-elif $cc -V 2>&1 | grep -q Sun; then |
|
2403 |
- cc_type=suncc |
|
2404 |
- cc_ident=$($cc -V 2>&1 | head -n1 | cut -d' ' -f 2-) |
|
2405 |
- DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)' |
|
2406 |
- DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -xM1' |
|
2407 |
- add_ldflags -xc99 |
|
2408 |
- cflags_speed='-O5' |
|
2409 |
- cflags_size='-O5 -xspace' |
|
2410 |
- cflags_filter=suncc_flags |
|
2411 |
-elif $cc -v 2>&1 | grep -q 'PathScale\|Path64'; then |
|
2412 |
- cc_type=pathscale |
|
2413 |
- cc_ident=$($cc -v 2>&1 | head -n1 | tr -d :) |
|
2414 |
- CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2415 |
- AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2416 |
- cflags_speed='-O2' |
|
2417 |
- cflags_size='-Os' |
|
2418 |
- cflags_filter='filter_out -Wdisabled-optimization' |
|
2419 |
-elif $cc -v 2>&1 | grep -q Open64; then |
|
2420 |
- cc_type=open64 |
|
2421 |
- cc_ident=$($cc -v 2>&1 | head -n1 | tr -d :) |
|
2422 |
- CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2423 |
- AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' |
|
2424 |
- cflags_speed='-O2' |
|
2425 |
- cflags_size='-Os' |
|
2426 |
- cflags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros' |
|
2427 |
-elif $cc -V 2>&1 | grep -q Portland; then |
|
2428 |
- cc_type=pgi |
|
2429 |
- cc_ident="PGI $($cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')" |
|
2430 |
- opt_common='-alias=ansi -Mlre -Mpre' |
|
2431 |
- cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common" |
|
2432 |
- cflags_size="-O2 -Munroll=c:1 $opt_common" |
|
2433 |
- cflags_noopt="-O1" |
|
2434 |
- cflags_filter=pgi_flags |
|
2435 |
-fi |
|
2434 |
+ |
|
2435 |
+ eval ${pfx}_type=\$_type |
|
2436 |
+ eval ${pfx}_ident=\$_ident |
|
2437 |
+} |
|
2438 |
+ |
|
2439 |
+set_ccvars(){ |
|
2440 |
+ eval ${1}_O=\${_cc_o-\${${1}_O}} |
|
2441 |
+ |
|
2442 |
+ if [ -n "$_depflags" ]; then |
|
2443 |
+ eval ${1}_DEPFLAGS=\$_depflags |
|
2444 |
+ else |
|
2445 |
+ eval ${1}DEP=\${_DEPCMD:-\$DEPCMD} |
|
2446 |
+ eval ${1}DEP_FLAGS=\${_DEPFLAGS:-\$DEPFLAGS} |
|
2447 |
+ eval DEP${1}FLAGS=\$_flags |
|
2448 |
+ fi |
|
2449 |
+} |
|
2450 |
+ |
|
2451 |
+probe_cc cc "$cc" |
|
2452 |
+cflags_filter=$_flags_filter |
|
2453 |
+cflags_speed=$_cflags_speed |
|
2454 |
+cflags_size=$_cflags_size |
|
2455 |
+cflags_noopt=$_cflags_noopt |
|
2456 |
+add_cflags $_flags $_cflags |
|
2457 |
+cc_ldflags=$_ldflags |
|
2458 |
+set_ccvars CC |
|
2459 |
+ |
|
2460 |
+probe_cc hostcc "$host_cc" |
|
2461 |
+host_cflags_filter=$_flags_filter |
|
2462 |
+host_ldflags_filter=$_flags_filter |
|
2463 |
+add_host_cflags $_flags $_cflags |
|
2464 |
+add_host_ldflags $_flags $_ldflags |
|
2465 |
+set_ccvars HOSTCC |
|
2436 | 2466 |
|
2437 | 2467 |
test -n "$cc_type" && enable $cc_type || |
2438 | 2468 |
warn "Unknown C compiler $cc, unable to select optimal CFLAGS" |
... | ... |
@@ -2442,9 +2477,23 @@ test -n "$cc_type" && enable $cc_type || |
2442 | 2442 |
: ${ld_default:=$cc} |
2443 | 2443 |
set_default ar as dep_cc ld |
2444 | 2444 |
|
2445 |
-test -n "$CC_DEPFLAGS" || CCDEP=$DEPEND_CMD |
|
2446 |
-test -n "$CXX_DEPFLAGS" || CXXDEP=$DEPEND_CMD |
|
2447 |
-test -n "$AS_DEPFLAGS" || ASDEP=$DEPEND_CMD |
|
2445 |
+probe_cc as "$as" |
|
2446 |
+asflags_filter=$_flags_filter |
|
2447 |
+add_asflags $_flags $_cflags |
|
2448 |
+set_ccvars AS |
|
2449 |
+ |
|
2450 |
+probe_cc ld "$ld" |
|
2451 |
+ldflags_filter=$_flags_filter |
|
2452 |
+add_ldflags $_flags $_ldflags |
|
2453 |
+test "$cc_type" != "$ld_type" && add_ldflags $cc_ldflags |
|
2454 |
+LD_O=${_cc_o-$LD_O} |
|
2455 |
+ |
|
2456 |
+if [ -z "$CC_DEPFLAGS" ] && [ "$dep_cc" != "$cc" ]; then |
|
2457 |
+ probe_cc depcc "$dep_cc" |
|
2458 |
+ CCDEP=${_DEPCMD:-$DEPCMD} |
|
2459 |
+ CCDEP_FLAGS=${_DEPFLAGS:=$DEPFLAGS} |
|
2460 |
+ DEPCCFLAGS=$_flags |
|
2461 |
+fi |
|
2448 | 2462 |
|
2449 | 2463 |
add_cflags $extra_cflags |
2450 | 2464 |
add_cxxflags $extra_cxxflags |
... | ... |
@@ -3140,6 +3189,7 @@ EOF |
3140 | 3140 |
check_yasm "pextrd [eax], xmm0, 1" && enable yasm || |
3141 | 3141 |
die "yasm not found, use --disable-yasm for a crippled build" |
3142 | 3142 |
check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx |
3143 |
+ check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4 |
|
3143 | 3144 |
fi |
3144 | 3145 |
|
3145 | 3146 |
case "$cpu" in |
... | ... |
@@ -3673,6 +3723,7 @@ if enabled x86; then |
3673 | 3673 |
echo "SSE enabled ${sse-no}" |
3674 | 3674 |
echo "SSSE3 enabled ${ssse3-no}" |
3675 | 3675 |
echo "AVX enabled ${avx-no}" |
3676 |
+ echo "FMA4 enabled ${fma4-no}" |
|
3676 | 3677 |
echo "CMOV enabled ${cmov-no}" |
3677 | 3678 |
echo "CMOV is fast ${fast_cmov-no}" |
3678 | 3679 |
echo "EBX available ${ebx_available-no}" |
... | ... |
@@ -3814,6 +3865,9 @@ CXX=$cxx |
3814 | 3814 |
AS=$as |
3815 | 3815 |
LD=$ld |
3816 | 3816 |
DEPCC=$dep_cc |
3817 |
+DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS) |
|
3818 |
+DEPAS=$as |
|
3819 |
+DEPASFLAGS=$DEPASFLAGS \$(CPPFLAGS) |
|
3817 | 3820 |
YASM=$yasmexe |
3818 | 3821 |
YASMDEP=$yasmexe |
3819 | 3822 |
AR=$ar |
... | ... |
@@ -3825,9 +3879,10 @@ CPPFLAGS=$CPPFLAGS |
3825 | 3825 |
CFLAGS=$CFLAGS |
3826 | 3826 |
CXXFLAGS=$CXXFLAGS |
3827 | 3827 |
ASFLAGS=$ASFLAGS |
3828 |
-AS_O=$CC_O |
|
3828 |
+AS_O=$AS_O |
|
3829 | 3829 |
CC_O=$CC_O |
3830 | 3830 |
CXX_O=$CXX_O |
3831 |
+LD_O=$LD_O |
|
3831 | 3832 |
LDFLAGS=$LDFLAGS |
3832 | 3833 |
FFSERVERLDFLAGS=$FFSERVERLDFLAGS |
3833 | 3834 |
SHFLAGS=$SHFLAGS |
... | ... |
@@ -3842,10 +3897,11 @@ SLIBPREF=$SLIBPREF |
3842 | 3842 |
SLIBSUF=$SLIBSUF |
3843 | 3843 |
EXESUF=$EXESUF |
3844 | 3844 |
EXTRA_VERSION=$extra_version |
3845 |
-DEPFLAGS=$DEPFLAGS |
|
3846 | 3845 |
CCDEP=$CCDEP |
3847 | 3846 |
CXXDEP=$CXXDEP |
3847 |
+CCDEP_FLAGS=$CCDEP_FLAGS |
|
3848 | 3848 |
ASDEP=$ASDEP |
3849 |
+ASDEP_FLAGS=$ASDEP_FLAGS |
|
3849 | 3850 |
CC_DEPFLAGS=$CC_DEPFLAGS |
3850 | 3851 |
AS_DEPFLAGS=$AS_DEPFLAGS |
3851 | 3852 |
HOSTCC=$host_cc |
... | ... |
@@ -3853,6 +3909,12 @@ HOSTCFLAGS=$host_cflags |
3853 | 3853 |
HOSTEXESUF=$HOSTEXESUF |
3854 | 3854 |
HOSTLDFLAGS=$host_ldflags |
3855 | 3855 |
HOSTLIBS=$host_libs |
3856 |
+DEPHOSTCC=$host_cc |
|
3857 |
+DEPHOSTCCFLAGS=$DEPHOSTCCFLAGS \$(HOSTCCFLAGS) |
|
3858 |
+HOSTCCDEP=$HOSTCCDEP |
|
3859 |
+HOSTCCDEP_FLAGS=$HOSTCCDEP_FLAGS |
|
3860 |
+HOSTCC_DEPFLAGS=$HOSTCC_DEPFLAGS |
|
3861 |
+HOSTCC_O=$HOSTCC_O |
|
3856 | 3862 |
TARGET_EXEC=$target_exec |
3857 | 3863 |
TARGET_PATH=$target_path |
3858 | 3864 |
SDL_LIBS=$sdl_libs |
... | ... |
@@ -28,8 +28,6 @@ doc/%.txt: doc/%.texi |
28 | 28 |
$(Q)$(TEXIDEP) |
29 | 29 |
$(M)makeinfo --force --no-headers -o $@ $< 2>/dev/null |
30 | 30 |
|
31 |
-doc/print_options.o: libavformat/options_table.h libavcodec/options_table.h |
|
32 |
- |
|
33 | 31 |
GENTEXI = format codec |
34 | 32 |
GENTEXI := $(GENTEXI:%=doc/avoptions_%.texi) |
35 | 33 |
|
... | ... |
@@ -32,7 +32,7 @@ OBJS = allcodecs.o \ |
32 | 32 |
utils.o \ |
33 | 33 |
|
34 | 34 |
# parts needed for many different codecs |
35 |
-OBJS-$(CONFIG_AANDCT) += aandcttab.o |
|
35 |
+OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o |
|
36 | 36 |
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o |
37 | 37 |
OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o |
38 | 38 |
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o |
... | ... |
@@ -200,6 +200,7 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out, |
200 | 200 |
int lpc_order, int lpc_quant) |
201 | 201 |
{ |
202 | 202 |
int i; |
203 |
+ int32_t *pred = buffer_out; |
|
203 | 204 |
|
204 | 205 |
/* first sample always copies */ |
205 | 206 |
*buffer_out = *error_buffer; |
... | ... |
@@ -223,37 +224,35 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out, |
223 | 223 |
} |
224 | 224 |
|
225 | 225 |
/* read warm-up samples */ |
226 |
- for (i = 0; i < lpc_order; i++) { |
|
227 |
- buffer_out[i + 1] = sign_extend(buffer_out[i] + error_buffer[i + 1], |
|
228 |
- bps); |
|
229 |
- } |
|
226 |
+ for (i = 1; i <= lpc_order; i++) |
|
227 |
+ buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps); |
|
230 | 228 |
|
231 | 229 |
/* NOTE: 4 and 8 are very common cases that could be optimized. */ |
232 | 230 |
|
233 |
- for (i = lpc_order; i < nb_samples - 1; i++) { |
|
231 |
+ for (; i < nb_samples; i++) { |
|
234 | 232 |
int j; |
235 | 233 |
int val = 0; |
236 |
- int error_val = error_buffer[i + 1]; |
|
234 |
+ int error_val = error_buffer[i]; |
|
237 | 235 |
int error_sign; |
238 |
- int d = buffer_out[i - lpc_order]; |
|
236 |
+ int d = *pred++; |
|
239 | 237 |
|
240 | 238 |
/* LPC prediction */ |
241 | 239 |
for (j = 0; j < lpc_order; j++) |
242 |
- val += (buffer_out[i - j] - d) * lpc_coefs[j]; |
|
240 |
+ val += (pred[j] - d) * lpc_coefs[j]; |
|
243 | 241 |
val = (val + (1 << (lpc_quant - 1))) >> lpc_quant; |
244 | 242 |
val += d + error_val; |
245 |
- buffer_out[i + 1] = sign_extend(val, bps); |
|
243 |
+ buffer_out[i] = sign_extend(val, bps); |
|
246 | 244 |
|
247 | 245 |
/* adapt LPC coefficients */ |
248 | 246 |
error_sign = sign_only(error_val); |
249 | 247 |
if (error_sign) { |
250 |
- for (j = lpc_order - 1; j >= 0 && error_val * error_sign > 0; j--) { |
|
248 |
+ for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) { |
|
251 | 249 |
int sign; |
252 |
- val = d - buffer_out[i - j]; |
|
250 |
+ val = d - pred[j]; |
|
253 | 251 |
sign = sign_only(val) * error_sign; |
254 | 252 |
lpc_coefs[j] -= sign; |
255 | 253 |
val *= sign; |
256 |
- error_val -= (val >> lpc_quant) * (lpc_order - j); |
|
254 |
+ error_val -= (val >> lpc_quant) * (j + 1); |
|
257 | 255 |
} |
258 | 256 |
} |
259 | 257 |
} |
... | ... |
@@ -356,7 +355,7 @@ static int decode_element(AVCodecContext *avctx, void *data, int ch_index, |
356 | 356 |
lpc_order[ch] = get_bits(&alac->gb, 5); |
357 | 357 |
|
358 | 358 |
/* read the predictor table */ |
359 |
- for (i = 0; i < lpc_order[ch]; i++) |
|
359 |
+ for (i = lpc_order[ch] - 1; i >= 0; i--) |
|
360 | 360 |
lpc_coefs[ch][i] = get_sbits(&alac->gb, 16); |
361 | 361 |
} |
362 | 362 |
|
... | ... |
@@ -477,16 +476,19 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data, |
477 | 477 |
ALACContext *alac = avctx->priv_data; |
478 | 478 |
enum RawDataBlockType element; |
479 | 479 |
int channels; |
480 |
- int ch, ret; |
|
480 |
+ int ch, ret, got_end; |
|
481 | 481 |
|
482 | 482 |
init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8); |
483 | 483 |
|
484 |
+ got_end = 0; |
|
484 | 485 |
alac->nb_samples = 0; |
485 | 486 |
ch = 0; |
486 |
- while (get_bits_left(&alac->gb)) { |
|
487 |
+ while (get_bits_left(&alac->gb) >= 3) { |
|
487 | 488 |
element = get_bits(&alac->gb, 3); |
488 |
- if (element == TYPE_END) |
|
489 |
+ if (element == TYPE_END) { |
|
490 |
+ got_end = 1; |
|
489 | 491 |
break; |
492 |
+ } |
|
490 | 493 |
if (element > TYPE_CPE && element != TYPE_LFE) { |
491 | 494 |
av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d", element); |
492 | 495 |
return AVERROR_PATCHWELCOME; |
... | ... |
@@ -501,11 +503,15 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data, |
501 | 501 |
ret = decode_element(avctx, data, |
502 | 502 |
alac_channel_layout_offsets[alac->channels - 1][ch], |
503 | 503 |
channels); |
504 |
- if (ret < 0) |
|
504 |
+ if (ret < 0 && get_bits_left(&alac->gb)) |
|
505 | 505 |
return ret; |
506 | 506 |
|
507 | 507 |
ch += channels; |
508 | 508 |
} |
509 |
+ if (!got_end) { |
|
510 |
+ av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n"); |
|
511 |
+ return AVERROR_INVALIDDATA; |
|
512 |
+ } |
|
509 | 513 |
|
510 | 514 |
if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) { |
511 | 515 |
av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", |
... | ... |
@@ -298,8 +298,8 @@ static int tscc2_decode_frame(AVCodecContext *avctx, void *data, |
298 | 298 |
if (!size) { |
299 | 299 |
int skip_row = 1, j, off = i * c->mb_width; |
300 | 300 |
for (j = 0; j < c->mb_width; j++) { |
301 |
- if (c->slice_quants[off + i] == 1 || |
|
302 |
- c->slice_quants[off + i] == 2) { |
|
301 |
+ if (c->slice_quants[off + j] == 1 || |
|
302 |
+ c->slice_quants[off + j] == 2) { |
|
303 | 303 |
skip_row = 0; |
304 | 304 |
break; |
305 | 305 |
} |
... | ... |
@@ -1158,12 +1158,7 @@ ALIGN 16 |
1158 | 1158 |
add src1q, 2*mmsize |
1159 | 1159 |
sub lenq, 2*mmsize |
1160 | 1160 |
jge .loop |
1161 |
-%if mmsize == 32 |
|
1162 |
- vzeroupper |
|
1163 |
- RET |
|
1164 |
-%else |
|
1165 | 1161 |
REP_RET |
1166 |
-%endif |
|
1167 | 1162 |
%endmacro |
1168 | 1163 |
|
1169 | 1164 |
INIT_XMM sse |
... | ... |
@@ -1193,12 +1188,7 @@ ALIGN 16 |
1193 | 1193 |
|
1194 | 1194 |
sub lenq, 2*mmsize |
1195 | 1195 |
jge .loop |
1196 |
-%if mmsize == 32 |
|
1197 |
- vzeroupper |
|
1198 |
- RET |
|
1199 |
-%else |
|
1200 | 1196 |
REP_RET |
1201 |
-%endif |
|
1202 | 1197 |
%endmacro |
1203 | 1198 |
|
1204 | 1199 |
INIT_XMM sse |
... | ... |
@@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len |
1243 | 1243 |
%endif |
1244 | 1244 |
add lenq, mmsize |
1245 | 1245 |
jl .loop |
1246 |
-%if mmsize == 32 |
|
1247 |
- vzeroupper |
|
1248 |
- RET |
|
1249 |
-%endif |
|
1250 | 1246 |
.end: |
1251 | 1247 |
REP_RET |
1252 | 1248 |
%endmacro |
... | ... |
@@ -750,9 +750,6 @@ section .text |
750 | 750 |
; The others pass args in registers and don't spill anything. |
751 | 751 |
cglobal fft_dispatch%2, 2,5,8, z, nbits |
752 | 752 |
FFT_DISPATCH fullsuffix, nbits |
753 |
-%if mmsize == 32 |
|
754 |
- vzeroupper |
|
755 |
-%endif |
|
756 | 753 |
RET |
757 | 754 |
%endmacro ; DECL_FFT |
758 | 755 |
|
... | ... |
@@ -958,9 +955,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i |
958 | 958 |
%if ARCH_X86_64 == 0 |
959 | 959 |
add esp, 12 |
960 | 960 |
%endif |
961 |
-%if mmsize == 32 |
|
962 |
- vzeroupper |
|
963 |
-%endif |
|
964 | 961 |
RET |
965 | 962 |
%endmacro |
966 | 963 |
|
... | ... |
@@ -36,6 +36,8 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win, |
36 | 36 |
|
37 | 37 |
DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40]; |
38 | 38 |
|
39 |
+#if HAVE_INLINE_ASM |
|
40 |
+ |
|
39 | 41 |
#define MACS(rt, ra, rb) rt+=(ra)*(rb) |
40 | 42 |
#define MLSS(rt, ra, rb) rt-=(ra)*(rb) |
41 | 43 |
|
... | ... |
@@ -178,6 +180,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, |
178 | 178 |
*out = sum; |
179 | 179 |
} |
180 | 180 |
|
181 |
+#endif /* HAVE_INLINE_ASM */ |
|
181 | 182 |
|
182 | 183 |
#define DECL_IMDCT_BLOCKS(CPU1, CPU2) \ |
183 | 184 |
static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \ |
... | ... |
@@ -241,9 +244,11 @@ void ff_mpadsp_init_mmx(MPADSPContext *s) |
241 | 241 |
} |
242 | 242 |
} |
243 | 243 |
|
244 |
+#if HAVE_INLINE_ASM |
|
244 | 245 |
if (mm_flags & AV_CPU_FLAG_SSE2) { |
245 | 246 |
s->apply_window_float = apply_window_mp3; |
246 | 247 |
} |
248 |
+#endif /* HAVE_INLINE_ASM */ |
|
247 | 249 |
#if HAVE_YASM |
248 | 250 |
if (0) { |
249 | 251 |
#if HAVE_AVX |
... | ... |
@@ -83,8 +83,7 @@ section .text align=16 |
83 | 83 |
|
84 | 84 |
; %1 = row or col (for rounding variable) |
85 | 85 |
; %2 = number of bits to shift at the end |
86 |
-; %3 = optimization |
|
87 |
-%macro IDCT_1D 3 |
|
86 |
+%macro IDCT_1D 2 |
|
88 | 87 |
; a0 = (W4 * row[0]) + (1 << (15 - 1)); |
89 | 88 |
; a1 = a0; |
90 | 89 |
; a2 = a0; |
... | ... |
@@ -235,8 +234,8 @@ section .text align=16 |
235 | 235 |
|
236 | 236 |
; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride, |
237 | 237 |
; DCTELEM *block, const int16_t *qmat); |
238 |
-%macro idct_put_fn 2 |
|
239 |
-cglobal prores_idct_put_10_%1, 4, 4, %2 |
|
238 |
+%macro idct_put_fn 1 |
|
239 |
+cglobal prores_idct_put_10, 4, 4, %1 |
|
240 | 240 |
movsxd r1, r1d |
241 | 241 |
pxor m15, m15 ; zero |
242 | 242 |
|
... | ... |
@@ -252,7 +251,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2 |
252 | 252 |
pmullw m13,[r3+64] |
253 | 253 |
pmullw m12,[r3+96] |
254 | 254 |
|
255 |
- IDCT_1D row, 15, %1 |
|
255 |
+ IDCT_1D row, 15 |
|
256 | 256 |
|
257 | 257 |
; transpose for second part of IDCT |
258 | 258 |
TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3 |
... | ... |
@@ -267,7 +266,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2 |
267 | 267 |
|
268 | 268 |
; for (i = 0; i < 8; i++) |
269 | 269 |
; idctSparseColAdd(dest + i, line_size, block + i); |
270 |
- IDCT_1D col, 18, %1 |
|
270 |
+ IDCT_1D col, 18 |
|
271 | 271 |
|
272 | 272 |
; clip/store |
273 | 273 |
mova m3, [pw_4] |
... | ... |
@@ -302,13 +301,27 @@ cglobal prores_idct_put_10_%1, 4, 4, %2 |
302 | 302 |
RET |
303 | 303 |
%endmacro |
304 | 304 |
|
305 |
-INIT_XMM |
|
306 |
-idct_put_fn sse2, 16 |
|
307 |
-INIT_XMM |
|
308 |
-idct_put_fn sse4, 16 |
|
305 |
+%macro SIGNEXTEND 2-3 ; dstlow, dsthigh, tmp |
|
306 |
+%if cpuflag(sse4) |
|
307 |
+ movhlps %2, %1 |
|
308 |
+ pmovsxwd %1, %1 |
|
309 |
+ pmovsxwd %2, %2 |
|
310 |
+%else ; sse2 |
|
311 |
+ pxor %3, %3 |
|
312 |
+ pcmpgtw %3, %1 |
|
313 |
+ mova %2, %1 |
|
314 |
+ punpcklwd %1, %3 |
|
315 |
+ punpckhwd %2, %3 |
|
316 |
+%endif |
|
317 |
+%endmacro |
|
318 |
+ |
|
319 |
+INIT_XMM sse2 |
|
320 |
+idct_put_fn 16 |
|
321 |
+INIT_XMM sse4 |
|
322 |
+idct_put_fn 16 |
|
309 | 323 |
%if HAVE_AVX |
310 |
-INIT_AVX |
|
311 |
-idct_put_fn avx, 16 |
|
324 |
+INIT_XMM avx |
|
325 |
+idct_put_fn 16 |
|
312 | 326 |
%endif |
313 | 327 |
|
314 | 328 |
%endif |
... | ... |
@@ -578,11 +578,44 @@ static void swap_samplerates(AVFilterGraph *graph) |
578 | 578 |
swap_samplerates_on_filter(graph->filters[i]); |
579 | 579 |
} |
580 | 580 |
|
581 |
+#define CH_CENTER_PAIR (AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER) |
|
582 |
+#define CH_FRONT_PAIR (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT) |
|
583 |
+#define CH_STEREO_PAIR (AV_CH_STEREO_LEFT | AV_CH_STEREO_RIGHT) |
|
584 |
+#define CH_WIDE_PAIR (AV_CH_WIDE_LEFT | AV_CH_WIDE_RIGHT) |
|
585 |
+#define CH_SIDE_PAIR (AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT) |
|
586 |
+#define CH_DIRECT_PAIR (AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT) |
|
587 |
+#define CH_BACK_PAIR (AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT) |
|
588 |
+ |
|
589 |
+/* allowable substitutions for channel pairs when comparing layouts, |
|
590 |
+ * ordered by priority for both values */ |
|
591 |
+static const uint64_t ch_subst[][2] = { |
|
592 |
+ { CH_FRONT_PAIR, CH_CENTER_PAIR }, |
|
593 |
+ { CH_FRONT_PAIR, CH_WIDE_PAIR }, |
|
594 |
+ { CH_FRONT_PAIR, AV_CH_FRONT_CENTER }, |
|
595 |
+ { CH_CENTER_PAIR, CH_FRONT_PAIR }, |
|
596 |
+ { CH_CENTER_PAIR, CH_WIDE_PAIR }, |
|
597 |
+ { CH_CENTER_PAIR, AV_CH_FRONT_CENTER }, |
|
598 |
+ { CH_WIDE_PAIR, CH_FRONT_PAIR }, |
|
599 |
+ { CH_WIDE_PAIR, CH_CENTER_PAIR }, |
|
600 |
+ { CH_WIDE_PAIR, AV_CH_FRONT_CENTER }, |
|
601 |
+ { AV_CH_FRONT_CENTER, CH_FRONT_PAIR }, |
|
602 |
+ { AV_CH_FRONT_CENTER, CH_CENTER_PAIR }, |
|
603 |
+ { AV_CH_FRONT_CENTER, CH_WIDE_PAIR }, |
|
604 |
+ { CH_SIDE_PAIR, CH_DIRECT_PAIR }, |
|
605 |
+ { CH_SIDE_PAIR, CH_BACK_PAIR }, |
|
606 |
+ { CH_SIDE_PAIR, AV_CH_BACK_CENTER }, |
|
607 |
+ { CH_BACK_PAIR, CH_DIRECT_PAIR }, |
|
608 |
+ { CH_BACK_PAIR, CH_SIDE_PAIR }, |
|
609 |
+ { CH_BACK_PAIR, AV_CH_BACK_CENTER }, |
|
610 |
+ { AV_CH_BACK_CENTER, CH_BACK_PAIR }, |
|
611 |
+ { AV_CH_BACK_CENTER, CH_DIRECT_PAIR }, |
|
612 |
+ { AV_CH_BACK_CENTER, CH_SIDE_PAIR }, |
|
613 |
+}; |
|
614 |
+ |
|
581 | 615 |
static void swap_channel_layouts_on_filter(AVFilterContext *filter) |
582 | 616 |
{ |
583 | 617 |
AVFilterLink *link = NULL; |
584 |
- uint64_t chlayout; |
|
585 |
- int i, j; |
|
618 |
+ int i, j, k; |
|
586 | 619 |
|
587 | 620 |
for (i = 0; i < filter->nb_inputs; i++) { |
588 | 621 |
link = filter->inputs[i]; |
... | ... |
@@ -594,27 +627,55 @@ static void swap_channel_layouts_on_filter(AVFilterContext *filter) |
594 | 594 |
if (i == filter->nb_inputs) |
595 | 595 |
return; |
596 | 596 |
|
597 |
- chlayout = link->out_channel_layouts->channel_layouts[0]; |
|
598 |
- |
|
599 | 597 |
for (i = 0; i < filter->nb_outputs; i++) { |
600 | 598 |
AVFilterLink *outlink = filter->outputs[i]; |
601 |
- int best_idx, best_score = INT_MIN; |
|
599 |
+ int best_idx, best_score = INT_MIN, best_count_diff = INT_MAX; |
|
602 | 600 |
|
603 | 601 |
if (outlink->type != AVMEDIA_TYPE_AUDIO || |
604 | 602 |
outlink->in_channel_layouts->nb_channel_layouts < 2) |
605 | 603 |
continue; |
606 | 604 |
|
607 | 605 |
for (j = 0; j < outlink->in_channel_layouts->nb_channel_layouts; j++) { |
606 |
+ uint64_t in_chlayout = link->out_channel_layouts->channel_layouts[0]; |
|
608 | 607 |
uint64_t out_chlayout = outlink->in_channel_layouts->channel_layouts[j]; |
609 |
- int matched_channels = av_get_channel_layout_nb_channels(chlayout & |
|
610 |
- out_chlayout); |
|
611 |
- int extra_channels = av_get_channel_layout_nb_channels(out_chlayout & |
|
612 |
- (~chlayout)); |
|
613 |
- int score = matched_channels - extra_channels; |
|
608 |
+ int in_channels = av_get_channel_layout_nb_channels(in_chlayout); |
|
609 |
+ int out_channels = av_get_channel_layout_nb_channels(out_chlayout); |
|
610 |
+ int count_diff = out_channels - in_channels; |
|
611 |
+ int matched_channels, extra_channels; |
|
612 |
+ int score = 0; |
|
613 |
+ |
|
614 |
+ /* channel substitution */ |
|
615 |
+ for (k = 0; k < FF_ARRAY_ELEMS(ch_subst); k++) { |
|
616 |
+ uint64_t cmp0 = ch_subst[k][0]; |
|
617 |
+ uint64_t cmp1 = ch_subst[k][1]; |
|
618 |
+ if (( in_chlayout & cmp0) && (!(out_chlayout & cmp0)) && |
|
619 |
+ (out_chlayout & cmp1) && (!( in_chlayout & cmp1))) { |
|
620 |
+ in_chlayout &= ~cmp0; |
|
621 |
+ out_chlayout &= ~cmp1; |
|
622 |
+ /* add score for channel match, minus a deduction for |
|
623 |
+ having to do the substitution */ |
|
624 |
+ score += 10 * av_get_channel_layout_nb_channels(cmp1) - 2; |
|
625 |
+ } |
|
626 |
+ } |
|
614 | 627 |
|
615 |
- if (score > best_score) { |
|
628 |
+ /* no penalty for LFE channel mismatch */ |
|
629 |
+ if ( (in_chlayout & AV_CH_LOW_FREQUENCY) && |
|
630 |
+ (out_chlayout & AV_CH_LOW_FREQUENCY)) |
|
631 |
+ score += 10; |
|
632 |
+ in_chlayout &= ~AV_CH_LOW_FREQUENCY; |
|
633 |
+ out_chlayout &= ~AV_CH_LOW_FREQUENCY; |
|
634 |
+ |
|
635 |
+ matched_channels = av_get_channel_layout_nb_channels(in_chlayout & |
|
636 |
+ out_chlayout); |
|
637 |
+ extra_channels = av_get_channel_layout_nb_channels(out_chlayout & |
|
638 |
+ (~in_chlayout)); |
|
639 |
+ score += 10 * matched_channels - 5 * extra_channels; |
|
640 |
+ |
|
641 |
+ if (score > best_score || |
|
642 |
+ (count_diff < best_count_diff && score == best_score)) { |
|
616 | 643 |
best_score = score; |
617 | 644 |
best_idx = j; |
645 |
+ best_count_diff = count_diff; |
|
618 | 646 |
} |
619 | 647 |
} |
620 | 648 |
FFSWAP(uint64_t, outlink->in_channel_layouts->channel_layouts[0], |
... | ... |
@@ -515,6 +515,12 @@ static int gen_pong(URLContext *s, RTMPContext *rt, RTMPPacket *ppkt) |
515 | 515 |
uint8_t *p; |
516 | 516 |
int ret; |
517 | 517 |
|
518 |
+ if (ppkt->data_size < 6) { |
|
519 |
+ av_log(s, AV_LOG_ERROR, "Too short ping packet (%d)\n", |
|
520 |
+ ppkt->data_size); |
|
521 |
+ return AVERROR_INVALIDDATA; |
|
522 |
+ } |
|
523 |
+ |
|
518 | 524 |
if ((ret = ff_rtmp_packet_create(&pkt, RTMP_NETWORK_CHANNEL, RTMP_PT_PING, |
519 | 525 |
ppkt->timestamp + 1, 6)) < 0) |
520 | 526 |
return ret; |
... | ... |
@@ -885,9 +891,9 @@ static int handle_chunk_size(URLContext *s, RTMPPacket *pkt) |
885 | 885 |
RTMPContext *rt = s->priv_data; |
886 | 886 |
int ret; |
887 | 887 |
|
888 |
- if (pkt->data_size != 4) { |
|
888 |
+ if (pkt->data_size < 4) { |
|
889 | 889 |
av_log(s, AV_LOG_ERROR, |
890 |
- "Chunk size change packet is not 4 bytes long (%d)\n", |
|
890 |
+ "Too short chunk size change packet (%d)\n", |
|
891 | 891 |
pkt->data_size); |
892 | 892 |
return AVERROR_INVALIDDATA; |
893 | 893 |
} |
... | ... |
@@ -913,6 +919,12 @@ static int handle_ping(URLContext *s, RTMPPacket *pkt) |
913 | 913 |
RTMPContext *rt = s->priv_data; |
914 | 914 |
int t, ret; |
915 | 915 |
|
916 |
+ if (pkt->data_size < 2) { |
|
917 |
+ av_log(s, AV_LOG_ERROR, "Too short ping packet (%d)\n", |
|
918 |
+ pkt->data_size); |
|
919 |
+ return AVERROR_INVALIDDATA; |
|
920 |
+ } |
|
921 |
+ |
|
916 | 922 |
t = AV_RB16(pkt->data); |
917 | 923 |
if (t == 6) { |
918 | 924 |
if ((ret = gen_pong(s, rt, pkt)) < 0) |
... | ... |
@@ -950,6 +962,13 @@ static int handle_server_bw(URLContext *s, RTMPPacket *pkt) |
950 | 950 |
{ |
951 | 951 |
RTMPContext *rt = s->priv_data; |
952 | 952 |
|
953 |
+ if (pkt->data_size < 4) { |
|
954 |
+ av_log(s, AV_LOG_ERROR, |
|
955 |
+ "Too short server bandwidth report packet (%d)\n", |
|
956 |
+ pkt->data_size); |
|
957 |
+ return AVERROR_INVALIDDATA; |
|
958 |
+ } |
|
959 |
+ |
|
953 | 960 |
rt->server_bw = AV_RB32(pkt->data); |
954 | 961 |
if (rt->server_bw <= 0) { |
955 | 962 |
av_log(s, AV_LOG_ERROR, "Incorrect server bandwidth %d\n", |
... | ... |
@@ -246,9 +246,10 @@ static int handle_buffered_output(AVAudioResampleContext *avr, |
246 | 246 |
return 0; |
247 | 247 |
} |
248 | 248 |
|
249 |
-int avresample_convert(AVAudioResampleContext *avr, void **output, |
|
250 |
- int out_plane_size, int out_samples, void **input, |
|
251 |
- int in_plane_size, int in_samples) |
|
249 |
+int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, |
|
250 |
+ void **output, int out_plane_size, |
|
251 |
+ int out_samples, void **input, |
|
252 |
+ int in_plane_size, int in_samples) |
|
252 | 253 |
{ |
253 | 254 |
AudioData input_buffer; |
254 | 255 |
AudioData output_buffer; |
... | ... |
@@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len |
145 | 145 |
mova [dstq+lenq+mmsize], m2 |
146 | 146 |
add lenq, mmsize*2 |
147 | 147 |
jl .loop |
148 |
-%if mmsize == 32 |
|
149 |
- vzeroupper |
|
150 |
- RET |
|
151 |
-%else |
|
152 | 148 |
REP_RET |
153 |
-%endif |
|
154 | 149 |
%endmacro |
155 | 150 |
|
156 | 151 |
INIT_XMM sse2 |
... | ... |
@@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len |
218 | 218 |
mova [dstq+lenq+3*mmsize], m3 |
219 | 219 |
add lenq, mmsize*4 |
220 | 220 |
jl .loop |
221 |
-%if mmsize == 32 |
|
222 |
- vzeroupper |
|
223 |
- RET |
|
224 |
-%else |
|
225 | 221 |
REP_RET |
226 |
-%endif |
|
227 | 222 |
%endmacro |
228 | 223 |
|
229 | 224 |
INIT_XMM sse2 |
... | ... |
@@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 |
51 | 51 |
add srcq, mmsize*2 |
52 | 52 |
sub lend, mmsize*2/4 |
53 | 53 |
jg .loop |
54 |
-%if mmsize == 32 |
|
55 |
- vzeroupper |
|
56 |
- RET |
|
57 |
-%else |
|
58 | 54 |
REP_RET |
59 |
-%endif |
|
60 | 55 |
%endmacro |
61 | 56 |
|
62 | 57 |
INIT_XMM sse |
... | ... |
@@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 |
175 | 175 |
add src0q, mmsize |
176 | 176 |
sub lend, mmsize/4 |
177 | 177 |
jg .loop |
178 |
-%if mmsize == 32 |
|
179 |
- vzeroupper |
|
180 |
- RET |
|
181 |
-%else |
|
182 | 178 |
REP_RET |
183 |
-%endif |
|
184 | 179 |
%endmacro |
185 | 180 |
|
186 | 181 |
INIT_XMM sse |
... | ... |
@@ -236,3 +226,296 @@ MIX_1_TO_2_S16P_FLT |
236 | 236 |
INIT_XMM avx |
237 | 237 |
MIX_1_TO_2_S16P_FLT |
238 | 238 |
%endif |
239 |
+ |
|
240 |
+;----------------------------------------------------------------------------- |
|
241 |
+; void ff_mix_3_8_to_1_2_fltp/s16p_flt(float/int16_t **src, float **matrix, |
|
242 |
+; int len, int out_ch, int in_ch); |
|
243 |
+;----------------------------------------------------------------------------- |
|
244 |
+ |
|
245 |
+%macro MIX_3_8_TO_1_2_FLT 3 ; %1 = in channels, %2 = out channels, %3 = s16p or fltp |
|
246 |
+; define some names to make the code clearer |
|
247 |
+%assign in_channels %1 |
|
248 |
+%assign out_channels %2 |
|
249 |
+%assign stereo out_channels - 1 |
|
250 |
+%ifidn %3, s16p |
|
251 |
+ %assign is_s16 1 |
|
252 |
+%else |
|
253 |
+ %assign is_s16 0 |
|
254 |
+%endif |
|
255 |
+ |
|
256 |
+; determine how many matrix elements must go on the stack vs. mmregs |
|
257 |
+%assign matrix_elements in_channels * out_channels |
|
258 |
+%if is_s16 |
|
259 |
+ %if stereo |
|
260 |
+ %assign needed_mmregs 7 |
|
261 |
+ %else |
|
262 |
+ %assign needed_mmregs 5 |
|
263 |
+ %endif |
|
264 |
+%else |
|
265 |
+ %if stereo |
|
266 |
+ %assign needed_mmregs 4 |
|
267 |
+ %else |
|
268 |
+ %assign needed_mmregs 3 |
|
269 |
+ %endif |
|
270 |
+%endif |
|
271 |
+%assign matrix_elements_mm num_mmregs - needed_mmregs |
|
272 |
+%if matrix_elements < matrix_elements_mm |
|
273 |
+ %assign matrix_elements_mm matrix_elements |
|
274 |
+%endif |
|
275 |
+%if matrix_elements_mm < matrix_elements |
|
276 |
+ %assign matrix_elements_stack matrix_elements - matrix_elements_mm |
|
277 |
+%else |
|
278 |
+ %assign matrix_elements_stack 0 |
|
279 |
+%endif |
|
280 |
+ |
|
281 |
+cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, src0, src1, len, src2, src3, src4, src5, src6, src7 |
|
282 |
+ |
|
283 |
+; get aligned stack space if needed |
|
284 |
+%if matrix_elements_stack > 0 |
|
285 |
+ %if mmsize == 32 |
|
286 |
+ %assign bkpreg %1 + 1 |
|
287 |
+ %define bkpq r %+ bkpreg %+ q |
|
288 |
+ mov bkpq, rsp |
|
289 |
+ and rsp, ~(mmsize-1) |
|
290 |
+ sub rsp, matrix_elements_stack * mmsize |
|
291 |
+ %else |
|
292 |
+ %assign pad matrix_elements_stack * mmsize + (mmsize - gprsize) - (stack_offset & (mmsize - gprsize)) |
|
293 |
+ SUB rsp, pad |
|
294 |
+ %endif |
|
295 |
+%endif |
|
296 |
+ |
|
297 |
+; load matrix pointers |
|
298 |
+%define matrix0q r1q |
|
299 |
+%define matrix1q r3q |
|
300 |
+%if stereo |
|
301 |
+ mov matrix1q, [matrix0q+gprsize] |
|
302 |
+%endif |
|
303 |
+ mov matrix0q, [matrix0q] |
|
304 |
+ |
|
305 |
+; define matrix coeff names |
|
306 |
+%assign %%i 0 |
|
307 |
+%assign %%j needed_mmregs |
|
308 |
+%rep in_channels |
|
309 |
+ %if %%i >= matrix_elements_mm |
|
310 |
+ CAT_XDEFINE mx_stack_0_, %%i, 1 |
|
311 |
+ CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize] |
|
312 |
+ %else |
|
313 |
+ CAT_XDEFINE mx_stack_0_, %%i, 0 |
|
314 |
+ CAT_XDEFINE mx_0_, %%i, m %+ %%j |
|
315 |
+ %assign %%j %%j+1 |
|
316 |
+ %endif |
|
317 |
+ %assign %%i %%i+1 |
|
318 |
+%endrep |
|
319 |
+%if stereo |
|
320 |
+%assign %%i 0 |
|
321 |
+%rep in_channels |
|
322 |
+ %if in_channels + %%i >= matrix_elements_mm |
|
323 |
+ CAT_XDEFINE mx_stack_1_, %%i, 1 |
|
324 |
+ CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize] |
|
325 |
+ %else |
|
326 |
+ CAT_XDEFINE mx_stack_1_, %%i, 0 |
|
327 |
+ CAT_XDEFINE mx_1_, %%i, m %+ %%j |
|
328 |
+ %assign %%j %%j+1 |
|
329 |
+ %endif |
|
330 |
+ %assign %%i %%i+1 |
|
331 |
+%endrep |
|
332 |
+%endif |
|
333 |
+ |
|
334 |
+; load/splat matrix coeffs |
|
335 |
+%assign %%i 0 |
|
336 |
+%rep in_channels |
|
337 |
+ %if mx_stack_0_ %+ %%i |
|
338 |
+ VBROADCASTSS m0, [matrix0q+4*%%i] |
|
339 |
+ mova mx_0_ %+ %%i, m0 |
|
340 |
+ %else |
|
341 |
+ VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i] |
|
342 |
+ %endif |
|
343 |
+ %if stereo |
|
344 |
+ %if mx_stack_1_ %+ %%i |
|
345 |
+ VBROADCASTSS m0, [matrix1q+4*%%i] |
|
346 |
+ mova mx_1_ %+ %%i, m0 |
|
347 |
+ %else |
|
348 |
+ VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i] |
|
349 |
+ %endif |
|
350 |
+ %endif |
|
351 |
+ %assign %%i %%i+1 |
|
352 |
+%endrep |
|
353 |
+ |
|
354 |
+; load channel pointers to registers as offsets from the first channel pointer |
|
355 |
+%if ARCH_X86_64 |
|
356 |
+ movsxd lenq, r2d |
|
357 |
+%endif |
|
358 |
+ shl lenq, 2-is_s16 |
|
359 |
+%assign %%i 1 |
|
360 |
+%rep (in_channels - 1) |
|
361 |
+ %if ARCH_X86_32 && in_channels >= 7 && %%i >= 5 |
|
362 |
+ mov src5q, [src0q+%%i*gprsize] |
|
363 |
+ add src5q, lenq |
|
364 |
+ mov src %+ %%i %+ m, src5q |
|
365 |
+ %else |
|
366 |
+ mov src %+ %%i %+ q, [src0q+%%i*gprsize] |
|
367 |
+ add src %+ %%i %+ q, lenq |
|
368 |
+ %endif |
|
369 |
+ %assign %%i %%i+1 |
|
370 |
+%endrep |
|
371 |
+ mov src0q, [src0q] |
|
372 |
+ add src0q, lenq |
|
373 |
+ neg lenq |
|
374 |
+.loop |
|
375 |
+; for x86-32 with 7-8 channels we do not have enough gp registers for all src |
|
376 |
+; pointers, so we have to load some of them from the stack each time |
|
377 |
+%define copy_src_from_stack ARCH_X86_32 && in_channels >= 7 && %%i >= 5 |
|
378 |
+%if is_s16 |
|
379 |
+ ; mix with s16p input |
|
380 |
+ mova m0, [src0q+lenq] |
|
381 |
+ S16_TO_S32_SX 0, 1 |
|
382 |
+ cvtdq2ps m0, m0 |
|
383 |
+ cvtdq2ps m1, m1 |
|
384 |
+ %if stereo |
|
385 |
+ mulps m2, m0, mx_1_0 |
|
386 |
+ mulps m3, m1, mx_1_0 |
|
387 |
+ %endif |
|
388 |
+ mulps m0, m0, mx_0_0 |
|
389 |
+ mulps m1, m1, mx_0_0 |
|
390 |
+%assign %%i 1 |
|
391 |
+%rep (in_channels - 1) |
|
392 |
+ %if copy_src_from_stack |
|
393 |
+ %define src_ptr src5q |
|
394 |
+ %else |
|
395 |
+ %define src_ptr src %+ %%i %+ q |
|
396 |
+ %endif |
|
397 |
+ %if stereo |
|
398 |
+ %if copy_src_from_stack |
|
399 |
+ mov src_ptr, src %+ %%i %+ m |
|
400 |
+ %endif |
|
401 |
+ mova m4, [src_ptr+lenq] |
|
402 |
+ S16_TO_S32_SX 4, 5 |
|
403 |
+ cvtdq2ps m4, m4 |
|
404 |
+ cvtdq2ps m5, m5 |
|
405 |
+ fmaddps m2, m4, mx_1_ %+ %%i, m2, m6 |
|
406 |
+ fmaddps m3, m5, mx_1_ %+ %%i, m3, m6 |
|
407 |
+ fmaddps m0, m4, mx_0_ %+ %%i, m0, m4 |
|
408 |
+ fmaddps m1, m5, mx_0_ %+ %%i, m1, m5 |
|
409 |
+ %else |
|
410 |
+ %if copy_src_from_stack |
|
411 |
+ mov src_ptr, src %+ %%i %+ m |
|
412 |
+ %endif |
|
413 |
+ mova m2, [src_ptr+lenq] |
|
414 |
+ S16_TO_S32_SX 2, 3 |
|
415 |
+ cvtdq2ps m2, m2 |
|
416 |
+ cvtdq2ps m3, m3 |
|
417 |
+ fmaddps m0, m2, mx_0_ %+ %%i, m0, m4 |
|
418 |
+ fmaddps m1, m3, mx_0_ %+ %%i, m1, m4 |
|
419 |
+ %endif |
|
420 |
+ %assign %%i %%i+1 |
|
421 |
+%endrep |
|
422 |
+ %if stereo |
|
423 |
+ cvtps2dq m2, m2 |
|
424 |
+ cvtps2dq m3, m3 |
|
425 |
+ packssdw m2, m3 |
|
426 |
+ mova [src1q+lenq], m2 |
|
427 |
+ %endif |
|
428 |
+ cvtps2dq m0, m0 |
|
429 |
+ cvtps2dq m1, m1 |
|
430 |
+ packssdw m0, m1 |
|
431 |
+ mova [src0q+lenq], m0 |
|
432 |
+%else |
|
433 |
+ ; mix with fltp input |
|
434 |
+ %if stereo || mx_stack_0_0 |
|
435 |
+ mova m0, [src0q+lenq] |
|
436 |
+ %endif |
|
437 |
+ %if stereo |
|
438 |
+ mulps m1, m0, mx_1_0 |
|
439 |
+ %endif |
|
440 |
+ %if stereo || mx_stack_0_0 |
|
441 |
+ mulps m0, m0, mx_0_0 |
|
442 |
+ %else |
|
443 |
+ mulps m0, [src0q+lenq], mx_0_0 |
|
444 |
+ %endif |
|
445 |
+%assign %%i 1 |
|
446 |
+%rep (in_channels - 1) |
|
447 |
+ %if copy_src_from_stack |
|
448 |
+ %define src_ptr src5q |
|
449 |
+ mov src_ptr, src %+ %%i %+ m |
|
450 |
+ %else |
|
451 |
+ %define src_ptr src %+ %%i %+ q |
|
452 |
+ %endif |
|
453 |
+ ; avoid extra load for mono if matrix is in a mm register |
|
454 |
+ %if stereo || mx_stack_0_ %+ %%i |
|
455 |
+ mova m2, [src_ptr+lenq] |
|
456 |
+ %endif |
|
457 |
+ %if stereo |
|
458 |
+ fmaddps m1, m2, mx_1_ %+ %%i, m1, m3 |
|
459 |
+ %endif |
|
460 |
+ %if stereo || mx_stack_0_ %+ %%i |
|
461 |
+ fmaddps m0, m2, mx_0_ %+ %%i, m0, m2 |
|
462 |
+ %else |
|
463 |
+ fmaddps m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1 |
|
464 |
+ %endif |
|
465 |
+ %assign %%i %%i+1 |
|
466 |
+%endrep |
|
467 |
+ mova [src0q+lenq], m0 |
|
468 |
+ %if stereo |
|
469 |
+ mova [src1q+lenq], m1 |
|
470 |
+ %endif |
|
471 |
+%endif |
|
472 |
+ |
|
473 |
+ add lenq, mmsize |
|
474 |
+ jl .loop |
|
475 |
+; restore stack pointer |
|
476 |
+%if matrix_elements_stack > 0 |
|
477 |
+ %if mmsize == 32 |
|
478 |
+ mov rsp, bkpq |
|
479 |
+ %else |
|
480 |
+ ADD rsp, pad |
|
481 |
+ %endif |
|
482 |
+%endif |
|
483 |
+; zero ymm high halves |
|
484 |
+%if mmsize == 32 |
|
485 |
+ vzeroupper |
|
486 |
+%endif |
|
487 |
+ RET |
|
488 |
+%endmacro |
|
489 |
+ |
|
490 |
+%macro MIX_3_8_TO_1_2_FLT_FUNCS 0 |
|
491 |
+%assign %%i 3 |
|
492 |
+%rep 6 |
|
493 |
+ INIT_XMM sse |
|
494 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, fltp |
|
495 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, fltp |
|
496 |
+ INIT_XMM sse2 |
|
497 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, s16p |
|
498 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, s16p |
|
499 |
+ INIT_XMM sse4 |
|
500 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, s16p |
|
501 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, s16p |
|
502 |
+ ; do not use ymm AVX or FMA4 in x86-32 for 6 or more channels due to stack alignment issues |
|
503 |
+ %if HAVE_AVX |
|
504 |
+ %if ARCH_X86_64 || %%i < 6 |
|
505 |
+ INIT_YMM avx |
|
506 |
+ %else |
|
507 |
+ INIT_XMM avx |
|
508 |
+ %endif |
|
509 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, fltp |
|
510 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, fltp |
|
511 |
+ INIT_XMM avx |
|
512 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, s16p |
|
513 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, s16p |
|
514 |
+ %endif |
|
515 |
+ %if HAVE_FMA4 |
|
516 |
+ %if ARCH_X86_64 || %%i < 6 |
|
517 |
+ INIT_YMM fma4 |
|
518 |
+ %else |
|
519 |
+ INIT_XMM fma4 |
|
520 |
+ %endif |
|
521 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, fltp |
|
522 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, fltp |
|
523 |
+ INIT_XMM fma4 |
|
524 |
+ MIX_3_8_TO_1_2_FLT %%i, 1, s16p |
|
525 |
+ MIX_3_8_TO_1_2_FLT %%i, 2, s16p |
|
526 |
+ %endif |
|
527 |
+ %assign %%i %%i+1 |
|
528 |
+%endrep |
|
529 |
+%endmacro |
|
530 |
+ |
|
531 |
+MIX_3_8_TO_1_2_FLT_FUNCS |
... | ... |
@@ -47,6 +47,129 @@ extern void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len, |
47 | 47 |
extern void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len, |
48 | 48 |
int out_ch, int in_ch); |
49 | 49 |
|
50 |
+#define DEFINE_MIX_3_8_TO_1_2(chan) \ |
|
51 |
+extern void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \ |
|
52 |
+ float **matrix, int len, \ |
|
53 |
+ int out_ch, int in_ch); \ |
|
54 |
+extern void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \ |
|
55 |
+ float **matrix, int len, \ |
|
56 |
+ int out_ch, int in_ch); \ |
|
57 |
+ \ |
|
58 |
+extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \ |
|
59 |
+ float **matrix, int len, \ |
|
60 |
+ int out_ch, int in_ch); \ |
|
61 |
+extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \ |
|
62 |
+ float **matrix, int len, \ |
|
63 |
+ int out_ch, int in_ch); \ |
|
64 |
+ \ |
|
65 |
+extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \ |
|
66 |
+ float **matrix, int len, \ |
|
67 |
+ int out_ch, int in_ch); \ |
|
68 |
+extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \ |
|
69 |
+ float **matrix, int len, \ |
|
70 |
+ int out_ch, int in_ch); \ |
|
71 |
+ \ |
|
72 |
+extern void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \ |
|
73 |
+ float **matrix, int len, \ |
|
74 |
+ int out_ch, int in_ch); \ |
|
75 |
+extern void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \ |
|
76 |
+ float **matrix, int len, \ |
|
77 |
+ int out_ch, int in_ch); \ |
|
78 |
+ \ |
|
79 |
+extern void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \ |
|
80 |
+ float **matrix, int len, \ |
|
81 |
+ int out_ch, int in_ch); \ |
|
82 |
+extern void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \ |
|
83 |
+ float **matrix, int len, \ |
|
84 |
+ int out_ch, int in_ch); \ |
|
85 |
+ \ |
|
86 |
+extern void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \ |
|
87 |
+ float **matrix, int len, \ |
|
88 |
+ int out_ch, int in_ch); \ |
|
89 |
+extern void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \ |
|
90 |
+ float **matrix, int len, \ |
|
91 |
+ int out_ch, int in_ch); \ |
|
92 |
+ \ |
|
93 |
+extern void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \ |
|
94 |
+ float **matrix, int len, \ |
|
95 |
+ int out_ch, int in_ch); \ |
|
96 |
+extern void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \ |
|
97 |
+ float **matrix, int len, \ |
|
98 |
+ int out_ch, int in_ch); |
|
99 |
+ |
|
100 |
+DEFINE_MIX_3_8_TO_1_2(3) |
|
101 |
+DEFINE_MIX_3_8_TO_1_2(4) |
|
102 |
+DEFINE_MIX_3_8_TO_1_2(5) |
|
103 |
+DEFINE_MIX_3_8_TO_1_2(6) |
|
104 |
+DEFINE_MIX_3_8_TO_1_2(7) |
|
105 |
+DEFINE_MIX_3_8_TO_1_2(8) |
|
106 |
+ |
|
107 |
+#define SET_MIX_3_8_TO_1_2(chan) \ |
|
108 |
+ if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { \ |
|
109 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ |
|
110 |
+ chan, 1, 16, 4, "SSE", \ |
|
111 |
+ ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ |
|
112 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ |
|
113 |
+ chan, 2, 16, 4, "SSE", \ |
|
114 |
+ ff_mix_## chan ##_to_2_fltp_flt_sse); \ |
|
115 |
+ } \ |
|
116 |
+ if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { \ |
|
117 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
118 |
+ chan, 1, 16, 8, "SSE2", \ |
|
119 |
+ ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ |
|
120 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
121 |
+ chan, 2, 16, 8, "SSE2", \ |
|
122 |
+ ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ |
|
123 |
+ } \ |
|
124 |
+ if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { \ |
|
125 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
126 |
+ chan, 1, 16, 8, "SSE4", \ |
|
127 |
+ ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ |
|
128 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
129 |
+ chan, 2, 16, 8, "SSE4", \ |
|
130 |
+ ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ |
|
131 |
+ } \ |
|
132 |
+ if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { \ |
|
133 |
+ int ptr_align = 32; \ |
|
134 |
+ int smp_align = 8; \ |
|
135 |
+ if (ARCH_X86_32 || chan >= 6) { \ |
|
136 |
+ ptr_align = 16; \ |
|
137 |
+ smp_align = 4; \ |
|
138 |
+ } \ |
|
139 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ |
|
140 |
+ chan, 1, ptr_align, smp_align, "AVX", \ |
|
141 |
+ ff_mix_ ## chan ## _to_1_fltp_flt_avx); \ |
|
142 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ |
|
143 |
+ chan, 2, ptr_align, smp_align, "AVX", \ |
|
144 |
+ ff_mix_ ## chan ## _to_2_fltp_flt_avx); \ |
|
145 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
146 |
+ chan, 1, 16, 8, "AVX", \ |
|
147 |
+ ff_mix_ ## chan ## _to_1_s16p_flt_avx); \ |
|
148 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
149 |
+ chan, 2, 16, 8, "AVX", \ |
|
150 |
+ ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ |
|
151 |
+ } \ |
|
152 |
+ if (mm_flags & AV_CPU_FLAG_FMA4 && HAVE_FMA4) { \ |
|
153 |
+ int ptr_align = 32; \ |
|
154 |
+ int smp_align = 8; \ |
|
155 |
+ if (ARCH_X86_32 || chan >= 6) { \ |
|
156 |
+ ptr_align = 16; \ |
|
157 |
+ smp_align = 4; \ |
|
158 |
+ } \ |
|
159 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ |
|
160 |
+ chan, 1, ptr_align, smp_align, "FMA4", \ |
|
161 |
+ ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \ |
|
162 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ |
|
163 |
+ chan, 2, ptr_align, smp_align, "FMA4", \ |
|
164 |
+ ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \ |
|
165 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
166 |
+ chan, 1, 16, 8, "FMA4", \ |
|
167 |
+ ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \ |
|
168 |
+ ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ |
|
169 |
+ chan, 2, 16, 8, "FMA4", \ |
|
170 |
+ ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \ |
|
171 |
+ } |
|
172 |
+ |
|
50 | 173 |
av_cold void ff_audio_mix_init_x86(AudioMix *am) |
51 | 174 |
{ |
52 | 175 |
#if HAVE_YASM |
... | ... |
@@ -80,5 +203,12 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) |
80 | 80 |
ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, |
81 | 81 |
1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx); |
82 | 82 |
} |
83 |
+ |
|
84 |
+ SET_MIX_3_8_TO_1_2(3) |
|
85 |
+ SET_MIX_3_8_TO_1_2(4) |
|
86 |
+ SET_MIX_3_8_TO_1_2(5) |
|
87 |
+ SET_MIX_3_8_TO_1_2(6) |
|
88 |
+ SET_MIX_3_8_TO_1_2(7) |
|
89 |
+ SET_MIX_3_8_TO_1_2(8) |
|
83 | 90 |
#endif |
84 | 91 |
} |
... | ... |
@@ -797,11 +797,10 @@ int main(int argc, char **argv) |
797 | 797 |
av_expr_parse_and_eval(&d, *expr, |
798 | 798 |
const_names, const_values, |
799 | 799 |
NULL, NULL, NULL, NULL, NULL, 0, NULL); |
800 |
- if(isnan(d)){ |
|
800 |
+ if (isnan(d)) |
|
801 | 801 |
printf("'%s' -> nan\n\n", *expr); |
802 |
- }else{ |
|
802 |
+ else |
|
803 | 803 |
printf("'%s' -> %f\n\n", *expr, d); |
804 |
- } |
|
805 | 804 |
} |
806 | 805 |
|
807 | 806 |
av_expr_parse_and_eval(&d, "1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", |
... | ... |
@@ -42,12 +42,7 @@ ALIGN 16 |
42 | 42 |
|
43 | 43 |
sub lenq, 2*mmsize |
44 | 44 |
jge .loop |
45 |
-%if mmsize == 32 |
|
46 |
- vzeroupper |
|
47 |
- RET |
|
48 |
-%else |
|
49 | 45 |
REP_RET |
50 |
-%endif |
|
51 | 46 |
%endmacro |
52 | 47 |
|
53 | 48 |
INIT_XMM sse |
... | ... |
@@ -88,12 +83,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len |
88 | 88 |
mova [dstq+lenq+mmsize], m2 |
89 | 89 |
sub lenq, 2*mmsize |
90 | 90 |
jge .loop |
91 |
-%if mmsize == 32 |
|
92 |
- vzeroupper |
|
93 |
- RET |
|
94 |
-%else |
|
95 | 91 |
REP_RET |
96 |
-%endif |
|
97 | 92 |
%endmacro |
98 | 93 |
|
99 | 94 |
INIT_XMM sse |
... | ... |
@@ -392,11 +392,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120 |
392 | 392 |
%macro RET 0 |
393 | 393 |
WIN64_RESTORE_XMM_INTERNAL rsp |
394 | 394 |
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 |
395 |
+%if mmsize == 32 |
|
396 |
+ vzeroupper |
|
397 |
+%endif |
|
395 | 398 |
ret |
396 | 399 |
%endmacro |
397 | 400 |
|
398 | 401 |
%macro REP_RET 0 |
399 |
- %if regs_used > 7 || xmm_regs_used > 6 |
|
402 |
+ %if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 |
|
400 | 403 |
RET |
401 | 404 |
%else |
402 | 405 |
rep ret |
... | ... |
@@ -433,11 +436,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72 |
433 | 433 |
|
434 | 434 |
%macro RET 0 |
435 | 435 |
POP_IF_USED 14, 13, 12, 11, 10, 9 |
436 |
+%if mmsize == 32 |
|
437 |
+ vzeroupper |
|
438 |
+%endif |
|
436 | 439 |
ret |
437 | 440 |
%endmacro |
438 | 441 |
|
439 | 442 |
%macro REP_RET 0 |
440 |
- %if regs_used > 9 |
|
443 |
+ %if regs_used > 9 || mmsize == 32 |
|
441 | 444 |
RET |
442 | 445 |
%else |
443 | 446 |
rep ret |
... | ... |
@@ -479,11 +485,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 |
479 | 479 |
|
480 | 480 |
%macro RET 0 |
481 | 481 |
POP_IF_USED 6, 5, 4, 3 |
482 |
+%if mmsize == 32 |
|
483 |
+ vzeroupper |
|
484 |
+%endif |
|
482 | 485 |
ret |
483 | 486 |
%endmacro |
484 | 487 |
|
485 | 488 |
%macro REP_RET 0 |
486 |
- %if regs_used > 3 |
|
489 |
+ %if regs_used > 3 || mmsize == 32 |
|
487 | 490 |
RET |
488 | 491 |
%else |
489 | 492 |
rep ret |
... | ... |
@@ -1126,16 +1135,22 @@ AVX_INSTR pfmul, 1, 0, 1 |
1126 | 1126 |
%undef j |
1127 | 1127 |
|
1128 | 1128 |
%macro FMA_INSTR 3 |
1129 |
- %macro %1 4-7 %1, %2, %3 |
|
1130 |
- %if cpuflag(xop) |
|
1131 |
- v%5 %1, %2, %3, %4 |
|
1129 |
+ %macro %1 5-8 %1, %2, %3 |
|
1130 |
+ %if cpuflag(xop) || cpuflag(fma4) |
|
1131 |
+ v%6 %1, %2, %3, %4 |
|
1132 | 1132 |
%else |
1133 |
- %6 %1, %2, %3 |
|
1134 |
- %7 %1, %4 |
|
1133 |
+ %ifidn %1, %4 |
|
1134 |
+ %7 %5, %2, %3 |
|
1135 |
+ %8 %1, %4, %5 |
|
1136 |
+ %else |
|
1137 |
+ %7 %1, %2, %3 |
|
1138 |
+ %8 %1, %4 |
|
1139 |
+ %endif |
|
1135 | 1140 |
%endif |
1136 | 1141 |
%endmacro |
1137 | 1142 |
%endmacro |
1138 | 1143 |
|
1144 |
+FMA_INSTR fmaddps, mulps, addps |
|
1139 | 1145 |
FMA_INSTR pmacsdd, pmulld, paddd |
1140 | 1146 |
FMA_INSTR pmacsww, pmullw, paddw |
1141 | 1147 |
FMA_INSTR pmadcswd, pmaddwd, paddd |
... | ... |
@@ -15,9 +15,6 @@ ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw |
15 | 15 |
|
16 | 16 |
OBJDIRS += tests/data tests/vsynth1 |
17 | 17 |
|
18 |
-# Required due to missing automatic dependency tracking for HOSTOBJS. |
|
19 |
-tests/rotozoom.o tests/videogen.o: tests/utils.c |
|
20 |
- |
|
21 | 18 |
tests/vsynth1/00.pgm: tests/videogen$(HOSTEXESUF) | tests/vsynth1 |
22 | 19 |
$(M)./$< 'tests/vsynth1/' |
23 | 20 |
|