VASP 6.2 GPU compilation issues
Posted: Wed May 12, 2021 12:10 pm
We compiled the GPU version of VASP 6.2.0, and if we run it with 1 MPI task and 1 GPU, all tests are passes. However, if we run it with more than 1 (We tried with 2 MPI tasks (2 GPU) and 4 MPI tasks (4 GPU)) then 16 tests jobs fail with segmentation fault (no core dumped), the rest passes (at least no wrong results). We are not really sure how to proceed.
We uploaded the failed test jobs here: https://file.io/CzdnfRbSpowr (without WAVECAR, etc, it said file type not supported here), and the output of the whole test package (run.out). Any suggestions?
Below you can find the textual details:
Hardware:
2x Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz
12x 32Gb DDR4 2933 MT/s DIMM
4x NVIDIA Corporation GV100GL [Tesla V100 SXM2 32GB] (rev a1)
Toolchains (All components compiled with NVHPC):
NVHPC 20.9
CUDA 11.0.2
OpenMPI 4.0.3 (NVHPC bundled OpenMPI is not an option, due to the incompatible UCX)
QD 2.3.17
Intel MKL 2020.1.217
NVHPC 21.2
CUDA 11.2.1
OpenMPI 4.0.5 (NVHPC bundled OpenMPI is not an option, due to the incompatible UCX)
QD 2.3.17
Intel MKL 2020.4.304
makefile:
# Precompiler options
CPP_OPTIONS= -DHOST=\"LinuxPGI\" \
-DMPI -DMPI_BLOCK=8000 -DMPI_INPLACE -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Duse_bse_te \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf \
-D_OPENACC \
-DUSENCCL -DUSENCCLP2P
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
FC = mpif90 -acc -gpu=cc70,cuda11.0
FCL = mpif90 -acc -gpu=cc70,cuda11.0 -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
# Specify your NV HPC-SDK installation, try to set NVROOT automatically
#NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# ...or set NVROOT manually
#NVHPC ?= /opt/nvidia/hpc_sdk
#NVVERSION = 20.9
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
# Use NV HPC-SDK provided BLAS and LAPACK libraries
BLAS = -lblas
LAPACK = -llapack
BLACS =
SCALAPACK = -Mscalapack
CUDA = -cudalib=cublas,cusolver,cufft,nccl -cuda
#LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS) $(CUDA)
LLIBS = -L${MKLROOT}/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_openmpi_lp64 -lpthread -lm -ldl $(CUDA)
# Software emulation of quadruple precsion
QD ?= $(EBROOTQD)
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Use the FFTs from fftw
#FFTW ?= /opt/gnu/fftw-3.3.6-pl2-GNU-5.4.0
#LLIBS += -L$(FFTW)/lib -lfftw3
#INCS += -I$(FFTW)/include
INCS += -I$(MKLROOT)/include/fftw
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = nvfortran
CC_LIB = nvc
CFLAGS_LIB = -O
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB= linpack_double.o getshmem.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
# Normally no need to change this
SRCDIR = ../../src
BINDIR = ../../bin
We uploaded the failed test jobs here: https://file.io/CzdnfRbSpowr (without WAVECAR, etc, it said file type not supported here), and the output of the whole test package (run.out). Any suggestions?
Below you can find the textual details:
Hardware:
2x Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz
12x 32Gb DDR4 2933 MT/s DIMM
4x NVIDIA Corporation GV100GL [Tesla V100 SXM2 32GB] (rev a1)
Toolchains (All components compiled with NVHPC):
NVHPC 20.9
CUDA 11.0.2
OpenMPI 4.0.3 (NVHPC bundled OpenMPI is not an option, due to the incompatible UCX)
QD 2.3.17
Intel MKL 2020.1.217
NVHPC 21.2
CUDA 11.2.1
OpenMPI 4.0.5 (NVHPC bundled OpenMPI is not an option, due to the incompatible UCX)
QD 2.3.17
Intel MKL 2020.4.304
makefile:
# Precompiler options
CPP_OPTIONS= -DHOST=\"LinuxPGI\" \
-DMPI -DMPI_BLOCK=8000 -DMPI_INPLACE -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Duse_bse_te \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf \
-D_OPENACC \
-DUSENCCL -DUSENCCLP2P
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
FC = mpif90 -acc -gpu=cc70,cuda11.0
FCL = mpif90 -acc -gpu=cc70,cuda11.0 -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
# Specify your NV HPC-SDK installation, try to set NVROOT automatically
#NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# ...or set NVROOT manually
#NVHPC ?= /opt/nvidia/hpc_sdk
#NVVERSION = 20.9
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
# Use NV HPC-SDK provided BLAS and LAPACK libraries
BLAS = -lblas
LAPACK = -llapack
BLACS =
SCALAPACK = -Mscalapack
CUDA = -cudalib=cublas,cusolver,cufft,nccl -cuda
#LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS) $(CUDA)
LLIBS = -L${MKLROOT}/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_openmpi_lp64 -lpthread -lm -ldl $(CUDA)
# Software emulation of quadruple precsion
QD ?= $(EBROOTQD)
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Use the FFTs from fftw
#FFTW ?= /opt/gnu/fftw-3.3.6-pl2-GNU-5.4.0
#LLIBS += -L$(FFTW)/lib -lfftw3
#INCS += -I$(FFTW)/include
INCS += -I$(MKLROOT)/include/fftw
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = nvfortran
CC_LIB = nvc
CFLAGS_LIB = -O
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB= linpack_double.o getshmem.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
# Normally no need to change this
SRCDIR = ../../src
BINDIR = ../../bin