Commit f1ab71b0 authored by Timo Rothenpieler's avatar Timo Rothenpieler

build: add support for building .cu files via nvcc

Original work by Yogender Gupta <ygupta@nvidia.com>
parent 6a374057
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
*.swp *.swp
*.ver *.ver
*.version *.version
*.ptx
*.ptx.c
*_g *_g
\#* \#*
.\#* .\#*
......
...@@ -11,6 +11,8 @@ vpath %.asm $(SRC_PATH) ...@@ -11,6 +11,8 @@ vpath %.asm $(SRC_PATH)
vpath %.rc $(SRC_PATH) vpath %.rc $(SRC_PATH)
vpath %.v $(SRC_PATH) vpath %.v $(SRC_PATH)
vpath %.texi $(SRC_PATH) vpath %.texi $(SRC_PATH)
vpath %.cu $(SRC_PATH)
vpath %.ptx $(SRC_PATH)
vpath %/fate_config.sh.template $(SRC_PATH) vpath %/fate_config.sh.template $(SRC_PATH)
AVPROGS-$(CONFIG_FFMPEG) += ffmpeg AVPROGS-$(CONFIG_FFMPEG) += ffmpeg
......
# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
set -e
OUT="$1"
IN="$2"
NAME="$(basename "$IN")"
NAME="${NAME/.ptx/}"
echo -n "const char ${NAME}_ptx[] = \\" > "$OUT"
while read LINE
do
echo -ne "\n\t\"$LINE\\\n\"" >> "$OUT"
done < "$IN"
echo ";" >> "$OUT"
exit 0
...@@ -338,6 +338,7 @@ Toolchain options: ...@@ -338,6 +338,7 @@ Toolchain options:
--cxx=CXX use C compiler CXX [$cxx_default] --cxx=CXX use C compiler CXX [$cxx_default]
--objcc=OCC use ObjC compiler OCC [$cc_default] --objcc=OCC use ObjC compiler OCC [$cc_default]
--dep-cc=DEPCC use dependency generator DEPCC [$cc_default] --dep-cc=DEPCC use dependency generator DEPCC [$cc_default]
--nvcc=NVCC use Nvidia CUDA compiler NVCC [$nvcc_default]
--ld=LD use linker LD [$ld_default] --ld=LD use linker LD [$ld_default]
--pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default] --pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default]
--pkg-config-flags=FLAGS pass additional flags to pkgconf [] --pkg-config-flags=FLAGS pass additional flags to pkgconf []
...@@ -359,6 +360,7 @@ Toolchain options: ...@@ -359,6 +360,7 @@ Toolchain options:
--extra-libs=ELIBS add ELIBS [$ELIBS] --extra-libs=ELIBS add ELIBS [$ELIBS]
--extra-version=STRING version string suffix [] --extra-version=STRING version string suffix []
--optflags=OPTFLAGS override optimization-related compiler flags --optflags=OPTFLAGS override optimization-related compiler flags
--nvccflags=NVCCFLAGS override nvcc flags [$nvccflags_default]
--build-suffix=SUFFIX library name suffix [] --build-suffix=SUFFIX library name suffix []
--enable-pic build position-independent code --enable-pic build position-independent code
--enable-thumb compile for Thumb instruction set --enable-thumb compile for Thumb instruction set
...@@ -2221,6 +2223,7 @@ CMDLINE_SET=" ...@@ -2221,6 +2223,7 @@ CMDLINE_SET="
malloc_prefix malloc_prefix
nm nm
optflags optflags
nvccflags
pkg_config pkg_config
pkg_config_flags pkg_config_flags
progs_suffix progs_suffix
...@@ -2719,6 +2722,7 @@ vaapi_encode_deps="vaapi" ...@@ -2719,6 +2722,7 @@ vaapi_encode_deps="vaapi"
hwupload_cuda_filter_deps="cuda" hwupload_cuda_filter_deps="cuda"
scale_npp_filter_deps="cuda_sdk libnpp" scale_npp_filter_deps="cuda_sdk libnpp"
scale_cuda_filter_deps="cuda_sdk"
nvenc_deps="cuda" nvenc_deps="cuda"
nvenc_deps_any="dlopen LoadLibrary" nvenc_deps_any="dlopen LoadLibrary"
...@@ -3261,6 +3265,8 @@ strip_default="strip" ...@@ -3261,6 +3265,8 @@ strip_default="strip"
version_script='--version-script' version_script='--version-script'
yasmexe_default="yasm" yasmexe_default="yasm"
windres_default="windres" windres_default="windres"
nvcc_default="nvcc"
nvccflags_default="-gencode arch=compute_30,code=sm_30 -O2"
# OS # OS
target_os_default=$(tolower $(uname -s)) target_os_default=$(tolower $(uname -s))
...@@ -3334,6 +3340,8 @@ HOSTCC_C='-c' ...@@ -3334,6 +3340,8 @@ HOSTCC_C='-c'
HOSTCC_E='-E -o $@' HOSTCC_E='-E -o $@'
HOSTCC_O='-o $@' HOSTCC_O='-o $@'
HOSTLD_O='-o $@' HOSTLD_O='-o $@'
NVCC_C='-c'
NVCC_O='-o $@'
host_extralibs='-lm' host_extralibs='-lm'
host_cflags_filter=echo host_cflags_filter=echo
...@@ -3721,7 +3729,7 @@ windres_default="${cross_prefix}${windres_default}" ...@@ -3721,7 +3729,7 @@ windres_default="${cross_prefix}${windres_default}"
sysinclude_default="${sysroot}/usr/include" sysinclude_default="${sysroot}/usr/include"
set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \ set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \
target_exec target_os yasmexe target_exec target_os yasmexe nvcc
enabled cross_compile || host_cc_default=$cc enabled cross_compile || host_cc_default=$cc
set_default host_cc set_default host_cc
...@@ -6241,6 +6249,16 @@ if [ -z "$optflags" ]; then ...@@ -6241,6 +6249,16 @@ if [ -z "$optflags" ]; then
fi fi
fi fi
if [ -z "$nvccflags" ]; then
nvccflags=$nvccflags_default
fi
if enabled x86_64 || enabled ppc64 || enabled aarch64; then
nvccflags="$nvccflags -m64"
else
nvccflags="$nvccflags -m32"
fi
check_optflags(){ check_optflags(){
check_cflags "$@" check_cflags "$@"
enabled lto && check_ldflags "$@" enabled lto && check_ldflags "$@"
...@@ -6704,6 +6722,7 @@ ARFLAGS=$arflags ...@@ -6704,6 +6722,7 @@ ARFLAGS=$arflags
AR_O=$ar_o AR_O=$ar_o
RANLIB=$ranlib RANLIB=$ranlib
STRIP=$strip STRIP=$strip
NVCC=$nvcc
CP=cp -p CP=cp -p
LN_S=$ln_s LN_S=$ln_s
CPPFLAGS=$CPPFLAGS CPPFLAGS=$CPPFLAGS
...@@ -6711,6 +6730,7 @@ CFLAGS=$CFLAGS ...@@ -6711,6 +6730,7 @@ CFLAGS=$CFLAGS
CXXFLAGS=$CXXFLAGS CXXFLAGS=$CXXFLAGS
OBJCFLAGS=$OBJCFLAGS OBJCFLAGS=$OBJCFLAGS
ASFLAGS=$ASFLAGS ASFLAGS=$ASFLAGS
NVCCFLAGS=$nvccflags
AS_C=$AS_C AS_C=$AS_C
AS_O=$AS_O AS_O=$AS_O
OBJCC_C=$OBJCC_C OBJCC_C=$OBJCC_C
...@@ -6721,6 +6741,8 @@ CC_E=$CC_E ...@@ -6721,6 +6741,8 @@ CC_E=$CC_E
CC_O=$CC_O CC_O=$CC_O
CXX_C=$CXX_C CXX_C=$CXX_C
CXX_O=$CXX_O CXX_O=$CXX_O
NVCC_C=$NVCC_C
NVCC_O=$NVCC_O
LD_O=$LD_O LD_O=$LD_O
LD_LIB=$LD_LIB LD_LIB=$LD_LIB
LD_PATH=$LD_PATH LD_PATH=$LD_PATH
......
...@@ -15,7 +15,7 @@ ifndef SUBDIR ...@@ -15,7 +15,7 @@ ifndef SUBDIR
ifndef V ifndef V
Q = @ Q = @
ECHO = printf "$(1)\t%s\n" $(2) ECHO = printf "$(1)\t%s\n" $(2)
BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES BRIEF = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES NVCC
SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM
MSG = $@ MSG = $@
...@@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS) ...@@ -38,6 +38,7 @@ OBJCCFLAGS = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS)
ASFLAGS := $(CPPFLAGS) $(ASFLAGS) ASFLAGS := $(CPPFLAGS) $(ASFLAGS)
CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS) CXXFLAGS := $(CPPFLAGS) $(CFLAGS) $(CXXFLAGS)
YASMFLAGS += $(IFLAGS:%=%/) -Pconfig.asm YASMFLAGS += $(IFLAGS:%=%/) -Pconfig.asm
NVCCFLAGS += -ptx
HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS) HOSTCCFLAGS = $(IFLAGS) $(HOSTCPPFLAGS) $(HOSTCFLAGS)
LDFLAGS := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS) LDFLAGS := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS)
...@@ -52,6 +53,7 @@ COMPILE_CXX = $(call COMPILE,CXX) ...@@ -52,6 +53,7 @@ COMPILE_CXX = $(call COMPILE,CXX)
COMPILE_S = $(call COMPILE,AS) COMPILE_S = $(call COMPILE,AS)
COMPILE_M = $(call COMPILE,OBJCC) COMPILE_M = $(call COMPILE,OBJCC)
COMPILE_HOSTC = $(call COMPILE,HOSTCC) COMPILE_HOSTC = $(call COMPILE,HOSTCC)
COMPILE_NVCC = $(call COMPILE,NVCC)
%.o: %.c %.o: %.c
$(COMPILE_C) $(COMPILE_C)
...@@ -89,6 +91,12 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) ...@@ -89,6 +91,12 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC)
%.h.c: %.h.c:
$(Q)echo '#include "$*.h"' >$@ $(Q)echo '#include "$*.h"' >$@
%.ptx: %.cu
$(COMPILE_NVCC)
%.ptx.c: %.ptx
$(Q)sh $(SRC_PATH)/compat/cuda/ptx2c.sh $@ $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<)
%.c %.h %.pc %.ver %.version: TAG = GEN %.c %.h %.pc %.ver %.version: TAG = GEN
# Dummy rule to stop make trying to rebuild removed or renamed headers # Dummy rule to stop make trying to rebuild removed or renamed headers
...@@ -133,9 +141,10 @@ ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR) ...@@ -133,9 +141,10 @@ ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)
SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-) SKIPHEADERS += $(ARCH_HEADERS:%=$(ARCH)/%) $(SKIPHEADERS-)
SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%) SKIPHEADERS := $(SKIPHEADERS:%=$(SUBDIR)%)
HOBJS = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o)) HOBJS = $(filter-out $(SKIPHEADERS:.h=.h.o),$(ALLHEADERS:.h=.h.o))
PTXOBJS = $(filter %.ptx.o,$(OBJS))
$(HOBJS): CCFLAGS += $(CFLAGS_HEADERS) $(HOBJS): CCFLAGS += $(CFLAGS_HEADERS)
checkheaders: $(HOBJS) checkheaders: $(HOBJS)
.SECONDARY: $(HOBJS:.o=.c) .SECONDARY: $(HOBJS:.o=.c) $(PTXOBJS:.o=.c) $(PTXOBJS:.o=)
alltools: $(TOOLS) alltools: $(TOOLS)
...@@ -154,7 +163,7 @@ $(TOOLOBJS): | tools ...@@ -154,7 +163,7 @@ $(TOOLOBJS): | tools
OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS)) OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS))
CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm CLEANSUFFIXES = *.d *.o *~ *.h.c *.gcda *.gcno *.map *.ver *.version *.ho *$(DEFAULT_YASMD).asm *.ptx *.ptx.c
DISTCLEANSUFFIXES = *.pc DISTCLEANSUFFIXES = *.pc
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment