Browse code

build: add support for building .cu files via nvcc

Original work by Yogender Gupta <ygupta@nvidia.com>

Timo Rothenpieler authored on 2017/05/12 05:53:41
Showing 5 changed files
... ...
@@ -19,6 +19,8 @@
19 19
 *.swp
20 20
 *.ver
21 21
 *.version
22
+*.ptx
23
+*.ptx.c
22 24
 *_g
23 25
 \#*
24 26
 .\#*
... ...
@@ -11,6 +11,8 @@ vpath %.asm  $(SRC_PATH)
11 11
 vpath %.rc   $(SRC_PATH)
12 12
 vpath %.v    $(SRC_PATH)
13 13
 vpath %.texi $(SRC_PATH)
14
+vpath %.cu   $(SRC_PATH)
15
+vpath %.ptx  $(SRC_PATH)
14 16
 vpath %/fate_config.sh.template $(SRC_PATH)
15 17
 
16 18
 AVPROGS-$(CONFIG_FFMPEG)   += ffmpeg
17 19
new file mode 100644
... ...
@@ -0,0 +1,36 @@
0
+# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
1
+#
2
+# Permission is hereby granted, free of charge, to any person obtaining a
3
+# copy of this software and associated documentation files (the "Software"),
4
+# to deal in the Software without restriction, including without limitation
5
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
6
+# and/or sell copies of the Software, and to permit persons to whom the
7
+# Software is furnished to do so, subject to the following conditions:
8
+#
9
+# The above copyright notice and this permission notice shall be included in
10
+# all copies or substantial portions of the Software.
11
+#
12
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
15
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
18
+# DEALINGS IN THE SOFTWARE.
19
+
20
+set -e
21
+
22
+OUT="$1"
23
+IN="$2"
24
+NAME="$(basename "$IN")"
25
+NAME="${NAME/.ptx/}"
26
+
27
+echo -n "const char ${NAME}_ptx[] = \\" > "$OUT"
28
+while read LINE
29
+do
30
+echo -ne "\n\t\"$LINE\\\n\"" >> "$OUT"
31
+done < "$IN"
32
+echo ";" >> "$OUT"
33
+
34
+exit 0
35
+
... ...
@@ -338,6 +338,7 @@ Toolchain options:
338 338
   --cxx=CXX                use C compiler CXX [$cxx_default]
339 339
   --objcc=OCC              use ObjC compiler OCC [$cc_default]
340 340
   --dep-cc=DEPCC           use dependency generator DEPCC [$cc_default]
341
+  --nvcc=NVCC              use Nvidia CUDA compiler NVCC [$nvcc_default]
341 342
   --ld=LD                  use linker LD [$ld_default]
342 343
   --pkg-config=PKGCONFIG   use pkg-config tool PKGCONFIG [$pkg_config_default]
343 344
   --pkg-config-flags=FLAGS pass additional flags to pkgconf []
... ...
@@ -359,6 +360,7 @@ Toolchain options:
359 359
   --extra-libs=ELIBS       add ELIBS [$ELIBS]
360 360
   --extra-version=STRING   version string suffix []
361 361
   --optflags=OPTFLAGS      override optimization-related compiler flags
362
+  --nvccflags=NVCCFLAGS    override nvcc flags [$nvccflags_default]
362 363
   --build-suffix=SUFFIX    library name suffix []
363 364
   --enable-pic             build position-independent code
364 365
   --enable-thumb           compile for Thumb instruction set
... ...
@@ -2221,6 +2223,7 @@ CMDLINE_SET="
2221 2221
     malloc_prefix
2222 2222
     nm
2223 2223
     optflags
2224
+    nvccflags
2224 2225
     pkg_config
2225 2226
     pkg_config_flags
2226 2227
     progs_suffix
... ...
@@ -2719,6 +2722,7 @@ vaapi_encode_deps="vaapi"
2719 2719
 
2720 2720
 hwupload_cuda_filter_deps="cuda"
2721 2721
 scale_npp_filter_deps="cuda_sdk libnpp"
2722
+scale_cuda_filter_deps="cuda_sdk"
2722 2723
 
2723 2724
 nvenc_deps="cuda"
2724 2725
 nvenc_deps_any="dlopen LoadLibrary"
... ...
@@ -3261,6 +3265,8 @@ strip_default="strip"
3261 3261
 version_script='--version-script'
3262 3262
 yasmexe_default="yasm"
3263 3263
 windres_default="windres"
3264
+nvcc_default="nvcc"
3265
+nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2"
3264 3266
 
3265 3267
 # OS
3266 3268
 target_os_default=$(tolower $(uname -s))
... ...
@@ -3334,6 +3340,8 @@ HOSTCC_C='-c'
3334 3334
 HOSTCC_E='-E -o $@'
3335 3335
 HOSTCC_O='-o $@'
3336 3336
 HOSTLD_O='-o $@'
3337
+NVCC_C='-c'
3338
+NVCC_O='-o $@'
3337 3339
 
3338 3340
 host_extralibs='-lm'
3339 3341
 host_cflags_filter=echo
... ...
@@ -3721,7 +3729,7 @@ windres_default="${cross_prefix}${windres_default}"
3721 3721
 sysinclude_default="${sysroot}/usr/include"
3722 3722
 
3723 3723
 set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \
3724
-    target_exec target_os yasmexe
3724
+    target_exec target_os yasmexe nvcc
3725 3725
 enabled cross_compile || host_cc_default=$cc
3726 3726
 set_default host_cc
3727 3727
 
... ...
@@ -6241,6 +6249,16 @@ if [ -z "$optflags" ]; then
6241 6241
     fi
6242 6242
 fi
6243 6243
 
6244
+if [ -z "$nvccflags" ]; then
6245
+    nvccflags=$nvccflags_default
6246
+fi
6247
+
6248
+if enabled x86_64 || enabled ppc64 || enabled aarch64; then
6249
+    nvccflags="$nvccflags -m64"
6250
+else
6251
+    nvccflags="$nvccflags -m32"
6252
+fi
6253
+
6244 6254
 check_optflags(){
6245 6255
     check_cflags "$@"
6246 6256
     enabled lto && check_ldflags "$@"
... ...
@@ -6704,6 +6722,7 @@ ARFLAGS=$arflags
6704 6704
 AR_O=$ar_o
6705 6705
 RANLIB=$ranlib
6706 6706
 STRIP=$strip
6707
+NVCC=$nvcc
6707 6708
 CP=cp -p
6708 6709
 LN_S=$ln_s
6709 6710
 CPPFLAGS=$CPPFLAGS
... ...
@@ -6711,6 +6730,7 @@ CFLAGS=$CFLAGS
6711 6711
 CXXFLAGS=$CXXFLAGS
6712 6712
 OBJCFLAGS=$OBJCFLAGS
6713 6713
 ASFLAGS=$ASFLAGS
6714
+NVCCFLAGS=$nvccflags
6714 6715
 AS_C=$AS_C
6715 6716
 AS_O=$AS_O
6716 6717
 OBJCC_C=$OBJCC_C
... ...
@@ -6721,6 +6741,8 @@ CC_E=$CC_E
6721 6721
 CC_O=$CC_O
6722 6722
 CXX_C=$CXX_C
6723 6723
 CXX_O=$CXX_O
6724
+NVCC_C=$NVCC_C
6725
+NVCC_O=$NVCC_O
6724 6726
 LD_O=$LD_O
6725 6727
 LD_LIB=$LD_LIB
6726 6728
 LD_PATH=$LD_PATH
... ...
@@ -15,7 +15,7 @@ ifndef SUBDIR
15 15
 ifndef V
16 16
 Q      = @
17 17
 ECHO   = printf "$(1)\t%s\n" $(2)
18
-BRIEF  = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES
18
+BRIEF  = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES NVCC
19 19
 SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM
20 20
 
21 21
 MSG    = $@
... ...
@@ -38,6 +38,7 @@ OBJCCFLAGS  = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS)
38 38
 ASFLAGS    := $(CPPFLAGS) $(ASFLAGS)
39 39
 CXXFLAGS   := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS)
40 40
 YASMFLAGS  += $(IFLAGS:%=%/) -Pconfig.asm
41
+NVCCFLAGS  += -ptx
41 42
 
42 43
 HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS)
43 44
 LDFLAGS    := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS)
... ...
@@ -52,6 +53,7 @@ COMPILE_CXX = $(call COMPILE,CXX)
52 52
 COMPILE_S = $(call COMPILE,AS)
53 53
 COMPILE_M = $(call COMPILE,OBJCC)
54 54
 COMPILE_HOSTC = $(call COMPILE,HOSTCC)
55
+COMPILE_NVCC = $(call COMPILE,NVCC)
55 56
 
56 57
 %.o: %.c
57 58
 	$(COMPILE_C)
... ...
@@ -89,6 +91,12 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC)
89 89
 %.h.c:
90 90
 	$(Q)echo '#include "$*.h"' >$@
91 91
 
92
+%.ptx: %.cu
93
+	$(COMPILE_NVCC)
94
+
95
+%.ptx.c: %.ptx
96
+	$(Q)sh $(SRC_PATH)/compat/cuda/ptx2c.sh $@ $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<)
97
+
92 98
 %.c %.h %.pc %.ver %.version: TAG = GEN
93 99
 
94 100
 # Dummy rule to stop make trying to rebuild removed or renamed headers
... ...
@@ -133,9 +141,10 @@ ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)
133 133
 SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-)
134 134
 SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%)
135 135
 HOBJS        = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o))
136
+PTXOBJS      = $(filter %.ptx.o,$(OBJS))
136 137
 $(HOBJS):     CCFLAGS += $(CFLAGS_HEADERS)
137 138
 checkheaders: $(HOBJS)
138
-.SECONDARY:   $(HOBJS:.o=.c)
139
+.SECONDARY:   $(HOBJS:.o=.c) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=)
139 140
 
140 141
 alltools: $(TOOLS)
141 142
 
... ...
@@ -154,7 +163,7 @@ $(TOOLOBJS): | tools
154 154
 
155 155
 OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS))
156 156
 
157
-CLEANSUFFIXES     = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm
157
+CLEANSUFFIXES     = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm *.ptx *.ptx.c
158 158
 DISTCLEANSUFFIXES = *.pc
159 159
 LIBSUFFIXES       = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
160 160