Recently we want to compile vasp 6.2.0 with openacc support. Since several of our GPUs are RTX A6000, which requires support of cuda11.1+. The compilation environment I'm using is PGI compilers provided by NVHPC SDK 21.2. The OpenACC I'm using comes with cuda11.2:
- nvfortran 21.2-0 LLVM 64-bit target on x86-64 Linux -tp zen
Using the makefile.include.linux_pgi_acc examples provided in the arch/ folder, the changes I made to adapt to cuda11.2 are
Code: Select all
FC = mpif90 -acc -ta=tesla:cc35,cc50,cc60,cc70,cc75,cc80,cuda11.2
FCL = mpif90 -acc -ta=tesla:cc35,cc50,cc60,cc70,cc75,cc80,cuda11.2 -pgc++libs
Code: Select all
!dbg attachment points at wrong subprogram for function
!7278 = distinct !DISubprogram(name: "corpbesol_", linkageName: "corpbesol_", scope: !3, file: !4, line: 6464, type: !17, isLocal: false, isDefinition: true, scopeLine: 6464, isOptimized: false, unit: !3)
void (double*, double*, double*, double*, double*, i32*)* @"ggaall_$31"
%li2 = load i32, i32 addrspace(1)* getelementptr inbounds (%common._setexm_struct_def_21, %common._setexm_struct_def_21 addrspace(1)* @_setexm_struct_def_21, i64 0, i32 0), align 16, !dbg !18
!18 = !DILocation(line: 31, column: 1, scope: !19)
!19 = !DILexicalBlock(scope: !20, file: !4, line: 17, column: 1)
!20 = distinct !DISubprogram(name: "ggaall_", linkageName: "ggaall_", scope: !3, file: !4, line: 17, type: !17, isLocal: false, isDefinition: true, scopeLine: 17, isOptimized: false, unit: !3)
LLVM ERROR: Broken module found, compilation aborted!
NVFORTRAN-S-0155-Compiler failed to translate accelerator region (see -Minfo messages): Device compiler exited with error status code (xclib_grad.f90: 6524)
0 inform, 0 warnings, 1 severes, 0 fatal for
I appreciate much your help.
The complete makefile.include is as follows:
Code: Select all
# Precompiler options
CPP_OPTIONS= -DHOST=\"LinuxPGI\" \
-DMPI -DMPI_BLOCK=8000 -DMPI_INPLACE -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Duse_bse_te \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf \
-D_OPENACC \
-DUSENCCL
CPP = pgfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
FC = mpif90 -acc -gpu=cc35,cc50,cc60,cc70,cc75,cc80,cuda11.2
FCL = mpif90 -acc -gpu=cc35,cc50,cc60,cc70,cc75,cc80,cuda11.2 -pgc++libs
FREE = -Mfree
FFLAGS = -Mnoupcase -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
# Use PGI provided BLAS and LAPACK libraries
BLAS = -lblas
LAPACK = -llapack
BLACS =
SCALAPACK = -Mscalapack
CUDA = -Mcudalib=cublas -Mcudalib=cufft -Mcudalib=cusolver -Mcuda
LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS) $(CUDA)
NCCL ?= /opt/nvidia/hpc_sdk/Linux_x86_64/21.2/comm_libs/nccl/lib
LLIBS += -L$(NCCL) -lnccl
# Software emulation of quadruple precsion
QD ?= /opt/nvidia/hpc_sdk/Linux_x86_64/21.2/compilers/extras/qd
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Use the FFTs from fftw
#FFTW ?= /opt/pgi/fftw-3.3.8
LLIBS += -lfftw3
#INCS +=
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o
SOURCE_O2 := pead.o
# Workaround a bug in PGI compiler up to and including version 18.10
#OFLAG_IN = -fast -ta=tesla:nollvm
#OFLAG_IN = -fast -ta=tesla:nollvm
#SOURCE_IN := xcspin.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = pgfortran
CC_LIB = pgcc
CFLAGS_LIB = -O
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB= linpack_double.o getshmem.o
# For the parser library
CXX_PARS = pgc++ --no_warnings
# Normally no need to change this
SRCDIR = ../../src
BINDIR = ../../bin
MPI_INC = /opt/nvidia/hpc_sdk/Linux_x86_64/21.2/comm_libs/mpi/include