Original work by Yogender Gupta <ygupta@nvidia.com>
Timo Rothenpieler authored on 2017/05/12 05:53:4117 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,36 @@ |
0 |
+# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. |
|
1 |
+# |
|
2 |
+# Permission is hereby granted, free of charge, to any person obtaining a |
|
3 |
+# copy of this software and associated documentation files (the "Software"), |
|
4 |
+# to deal in the Software without restriction, including without limitation |
|
5 |
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
6 |
+# and/or sell copies of the Software, and to permit persons to whom the |
|
7 |
+# Software is furnished to do so, subject to the following conditions: |
|
8 |
+# |
|
9 |
+# The above copyright notice and this permission notice shall be included in |
|
10 |
+# all copies or substantial portions of the Software. |
|
11 |
+# |
|
12 |
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
13 |
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
14 |
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
15 |
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
16 |
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
17 |
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
18 |
+# DEALINGS IN THE SOFTWARE. |
|
19 |
+ |
|
20 |
+set -e |
|
21 |
+ |
|
22 |
+OUT="$1" |
|
23 |
+IN="$2" |
|
24 |
+NAME="$(basename "$IN")" |
|
25 |
+NAME="${NAME/.ptx/}" |
|
26 |
+ |
|
27 |
+echo -n "const char ${NAME}_ptx[] = \\" > "$OUT" |
|
28 |
+while read LINE |
|
29 |
+do |
|
30 |
+echo -ne "\n\t\"$LINE\\\n\"" >> "$OUT" |
|
31 |
+done < "$IN" |
|
32 |
+echo ";" >> "$OUT" |
|
33 |
+ |
|
34 |
+exit 0 |
|
35 |
+ |
... | ... |
@@ -338,6 +338,7 @@ Toolchain options: |
338 | 338 |
--cxx=CXX use C compiler CXX [$cxx_default] |
339 | 339 |
--objcc=OCC use ObjC compiler OCC [$cc_default] |
340 | 340 |
--dep-cc=DEPCC use dependency generator DEPCC [$cc_default] |
341 |
+ --nvcc=NVCC use Nvidia CUDA compiler NVCC [$nvcc_default] |
|
341 | 342 |
--ld=LD use linker LD [$ld_default] |
342 | 343 |
--pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default] |
343 | 344 |
--pkg-config-flags=FLAGS pass additional flags to pkgconf [] |
... | ... |
@@ -359,6 +360,7 @@ Toolchain options: |
359 | 359 |
--extra-libs=ELIBS add ELIBS [$ELIBS] |
360 | 360 |
--extra-version=STRING version string suffix [] |
361 | 361 |
--optflags=OPTFLAGS override optimization-related compiler flags |
362 |
+ --nvccflags=NVCCFLAGS override nvcc flags [$nvccflags_default] |
|
362 | 363 |
--build-suffix=SUFFIX library name suffix [] |
363 | 364 |
--enable-pic build position-independent code |
364 | 365 |
--enable-thumb compile for Thumb instruction set |
... | ... |
@@ -2221,6 +2223,7 @@ CMDLINE_SET=" |
2221 | 2221 |
malloc_prefix |
2222 | 2222 |
nm |
2223 | 2223 |
optflags |
2224 |
+ nvccflags |
|
2224 | 2225 |
pkg_config |
2225 | 2226 |
pkg_config_flags |
2226 | 2227 |
progs_suffix |
... | ... |
@@ -2719,6 +2722,7 @@ vaapi_encode_deps="vaapi" |
2719 | 2719 |
|
2720 | 2720 |
hwupload_cuda_filter_deps="cuda" |
2721 | 2721 |
scale_npp_filter_deps="cuda_sdk libnpp" |
2722 |
+scale_cuda_filter_deps="cuda_sdk" |
|
2722 | 2723 |
|
2723 | 2724 |
nvenc_deps="cuda" |
2724 | 2725 |
nvenc_deps_any="dlopen LoadLibrary" |
... | ... |
@@ -3261,6 +3265,8 @@ strip_default="strip" |
3261 | 3261 |
version_script='--version-script' |
3262 | 3262 |
yasmexe_default="yasm" |
3263 | 3263 |
windres_default="windres" |
3264 |
+nvcc_default="nvcc" |
|
3265 |
+nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2" |
|
3264 | 3266 |
|
3265 | 3267 |
# OS |
3266 | 3268 |
target_os_default=$(tolower $(uname -s)) |
... | ... |
@@ -3334,6 +3340,8 @@ HOSTCC_C='-c' |
3334 | 3334 |
HOSTCC_E='-E -o $@' |
3335 | 3335 |
HOSTCC_O='-o $@' |
3336 | 3336 |
HOSTLD_O='-o $@' |
3337 |
+NVCC_C='-c' |
|
3338 |
+NVCC_O='-o $@' |
|
3337 | 3339 |
|
3338 | 3340 |
host_extralibs='-lm' |
3339 | 3341 |
host_cflags_filter=echo |
... | ... |
@@ -3721,7 +3729,7 @@ windres_default="${cross_prefix}${windres_default}" |
3721 | 3721 |
sysinclude_default="${sysroot}/usr/include" |
3722 | 3722 |
|
3723 | 3723 |
set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \ |
3724 |
- target_exec target_os yasmexe |
|
3724 |
+ target_exec target_os yasmexe nvcc |
|
3725 | 3725 |
enabled cross_compile || host_cc_default=$cc |
3726 | 3726 |
set_default host_cc |
3727 | 3727 |
|
... | ... |
@@ -6241,6 +6249,16 @@ if [ -z "$optflags" ]; then |
6241 | 6241 |
fi |
6242 | 6242 |
fi |
6243 | 6243 |
|
6244 |
+if [ -z "$nvccflags" ]; then |
|
6245 |
+ nvccflags=$nvccflags_default |
|
6246 |
+fi |
|
6247 |
+ |
|
6248 |
+if enabled x86_64 || enabled ppc64 || enabled aarch64; then |
|
6249 |
+ nvccflags="$nvccflags -m64" |
|
6250 |
+else |
|
6251 |
+ nvccflags="$nvccflags -m32" |
|
6252 |
+fi |
|
6253 |
+ |
|
6244 | 6254 |
check_optflags(){ |
6245 | 6255 |
check_cflags "$@" |
6246 | 6256 |
enabled lto && check_ldflags "$@" |
... | ... |
@@ -6704,6 +6722,7 @@ ARFLAGS=$arflags |
6704 | 6704 |
AR_O=$ar_o |
6705 | 6705 |
RANLIB=$ranlib |
6706 | 6706 |
STRIP=$strip |
6707 |
+NVCC=$nvcc |
|
6707 | 6708 |
CP=cp -p |
6708 | 6709 |
LN_S=$ln_s |
6709 | 6710 |
CPPFLAGS=$CPPFLAGS |
... | ... |
@@ -6711,6 +6730,7 @@ CFLAGS=$CFLAGS |
6711 | 6711 |
CXXFLAGS=$CXXFLAGS |
6712 | 6712 |
OBJCFLAGS=$OBJCFLAGS |
6713 | 6713 |
ASFLAGS=$ASFLAGS |
6714 |
+NVCCFLAGS=$nvccflags |
|
6714 | 6715 |
AS_C=$AS_C |
6715 | 6716 |
AS_O=$AS_O |
6716 | 6717 |
OBJCC_C=$OBJCC_C |
... | ... |
@@ -6721,6 +6741,8 @@ CC_E=$CC_E |
6721 | 6721 |
CC_O=$CC_O |
6722 | 6722 |
CXX_C=$CXX_C |
6723 | 6723 |
CXX_O=$CXX_O |
6724 |
+NVCC_C=$NVCC_C |
|
6725 |
+NVCC_O=$NVCC_O |
|
6724 | 6726 |
LD_O=$LD_O |
6725 | 6727 |
LD_LIB=$LD_LIB |
6726 | 6728 |
LD_PATH=$LD_PATH |
... | ... |
@@ -15,7 +15,7 @@ ifndef SUBDIR |
15 | 15 |
ifndef V |
16 | 16 |
Q = @ |
17 | 17 |
ECHO = printf "$(1)\t%s\n" $(2) |
18 |
-BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES |
|
18 |
+BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES NVCC |
|
19 | 19 |
SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM |
20 | 20 |
|
21 | 21 |
MSG = $@ |
... | ... |
@@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS) |
38 | 38 |
ASFLAGS := $(CPPFLAGS) $(ASFLAGS) |
39 | 39 |
CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) |
40 | 40 |
YASMFLAGS += $(IFLAGS:%=%/) -Pconfig.asm |
41 |
+NVCCFLAGS += -ptx |
|
41 | 42 |
|
42 | 43 |
HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS) |
43 | 44 |
LDFLAGS := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS) |
... | ... |
@@ -52,6 +53,7 @@ COMPILE_CXX = $(call COMPILE,CXX) |
52 | 52 |
COMPILE_S = $(call COMPILE,AS) |
53 | 53 |
COMPILE_M = $(call COMPILE,OBJCC) |
54 | 54 |
COMPILE_HOSTC = $(call COMPILE,HOSTCC) |
55 |
+COMPILE_NVCC = $(call COMPILE,NVCC) |
|
55 | 56 |
|
56 | 57 |
%.o: %.c |
57 | 58 |
$(COMPILE_C) |
... | ... |
@@ -89,6 +91,12 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) |
89 | 89 |
%.h.c: |
90 | 90 |
$(Q)echo '#include "$*.h"' >$@ |
91 | 91 |
|
92 |
+%.ptx: %.cu |
|
93 |
+ $(COMPILE_NVCC) |
|
94 |
+ |
|
95 |
+%.ptx.c: %.ptx |
|
96 |
+ $(Q)sh $(SRC_PATH)/compat/cuda/ptx2c.sh $@ $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) |
|
97 |
+ |
|
92 | 98 |
%.c %.h %.pc %.ver %.version: TAG = GEN |
93 | 99 |
|
94 | 100 |
# Dummy rule to stop make trying to rebuild removed or renamed headers |
... | ... |
@@ -133,9 +141,10 @@ ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR) |
133 | 133 |
SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-) |
134 | 134 |
SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%) |
135 | 135 |
HOBJS = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o)) |
136 |
+PTXOBJS = $(filter %.ptx.o,$(OBJS)) |
|
136 | 137 |
$(HOBJS): CCFLAGS += $(CFLAGS_HEADERS) |
137 | 138 |
checkheaders: $(HOBJS) |
138 |
-.SECONDARY: $(HOBJS:.o=.c) |
|
139 |
+.SECONDARY: $(HOBJS:.o=.c) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=) |
|
139 | 140 |
|
140 | 141 |
alltools: $(TOOLS) |
141 | 142 |
|
... | ... |
@@ -154,7 +163,7 @@ $(TOOLOBJS): | tools |
154 | 154 |
|
155 | 155 |
OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS)) |
156 | 156 |
|
157 |
-CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm |
|
157 |
+CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm *.ptx *.ptx.c |
|
158 | 158 |
DISTCLEANSUFFIXES = *.pc |
159 | 159 |
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a |
160 | 160 |
|