CPU usage explodes when using vasp6.2 cpu version compiled with nvidia hpc-sdk (openacc)
Posted: Sun May 16, 2021 9:55 am
I'm experienceing wierd problem when vasp.6.2.0 vasp_std running with nvidia hpc-sdk compilers (hpc-sdk 21.3 version).
The cpu usage blows up for each mpi processes like the picture below.
stdout also shows these messages I have not seen in gpu compiled version, nor vasp5.
To run, I exported two LD_LIBRARY_PATHs ;
/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/comm_libs/openmpi/openmpi-3.1.5/lib
/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/compilers/extras/qd/lib
Is this normal?
I post on this board since I think it might be compiler related?
The job does run until the end.
Any advice would be appreciated.
Thank you in advance.
------------------------
makefile.include
------------------------
# Precompiler options
CPP_OPTIONS= -DHOST=\"LinuxPGI\" \
-DMPI -DMPI_BLOCK=8000 -DMPI_INPLACE -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Duse_bse_te \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf
nvpath = /opt/nvidia/hpc_sdk/Linux_x86_64/21.3/compilers/bin
CPP = $(nvpath)/nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
FC = mpif90
FCL = mpif90 -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
# Specify your NV HPC-SDK installation, try to set NVROOT automatically
NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# ...or set NVROOT manually
NVHPC ?= /opt/nvidia/hpc_sdk
NVVERSION = 21.3
NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
# Use NV HPC-SDK provided BLAS and LAPACK libraries
BLAS = -lblas
LAPACK = -llapack
BLACS =
SCALAPACK = -Mscalapack
LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS)
# Software emulation of quadruple precsion
QD ?= $(NVROOT)/compilers/extras/qd
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Use the FFTs from fftw
FFTW ?= /xtmp/khs/fftw3/fftw-3.3.8
LLIBS += -L$(FFTW)/lib -lfftw3
INCS += -I$(FFTW)/include
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = nvfortran
CC_LIB = nvc
CFLAGS_LIB = -O
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB= linpack_double.o getshmem.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
# Normally no need to change this
SRCDIR = ../../src
BINDIR = ../../bin
#================================================
# GPU Stuff
CPP_GPU = -DCUDA_GPU -DRPROMU_CPROJ_OVERLAP -DCUFFT_MIN=28 -UscaLAPACK -Ufock_dblbuf # -DUSE_PINNED_MEMORY
OBJECTS_GPU= fftmpiw.o fftmpi_map.o fft3dlib.o fftw3d_gpu.o fftmpiw_gpu.o
CC = nvc
CXX = nvc++
CFLAGS = -fPIC -DADD_ -mp -cuda -DMAGMA_WITH_MKL -DMAGMA_SETAFFINITY -DGPUSHMEM=300 -DHAVE_CUBLAS
## Use a custom CUDA installation: minimal requirement is CUDA >= 10.X. For "sm_80" you need CUDA >= 11.X.
#CUDA_ROOT ?= /usr/local/cuda
#NVCC := $(CUDA_ROOT)/bin/nvcc
#CUDA_LIB := -L$(CUDA_ROOT)/lib64 -lnvToolsExt -lcudart -lcuda -lcufft -lcublas
# Or use the CUDA installation from the NV HPC-SDK
#CUDA_ROOT ?= $(NVROOT)/cuda/11.0
NVCC := nvcc
CUDA_LIB = -cudalib=cublas,cufft -lnvToolsExt
GENCODE_ARCH := -gencode=arch=compute_60,code=\"sm_60,compute_60\" \
-gencode=arch=compute_70,code=\"sm_70,compute_70\" \
-gencode=arch=compute_80,code=\"sm_80,compute_80\"
MPI_INC = $(NVROOT)/comm_libs/mpi/include
The cpu usage blows up for each mpi processes like the picture below.
stdout also shows these messages I have not seen in gpu compiled version, nor vasp5.
To run, I exported two LD_LIBRARY_PATHs ;
/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/comm_libs/openmpi/openmpi-3.1.5/lib
/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/compilers/extras/qd/lib
Is this normal?
I post on this board since I think it might be compiler related?
The job does run until the end.
Any advice would be appreciated.
Thank you in advance.
------------------------
makefile.include
------------------------
# Precompiler options
CPP_OPTIONS= -DHOST=\"LinuxPGI\" \
-DMPI -DMPI_BLOCK=8000 -DMPI_INPLACE -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Duse_bse_te \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf
nvpath = /opt/nvidia/hpc_sdk/Linux_x86_64/21.3/compilers/bin
CPP = $(nvpath)/nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
FC = mpif90
FCL = mpif90 -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
# Specify your NV HPC-SDK installation, try to set NVROOT automatically
NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# ...or set NVROOT manually
NVHPC ?= /opt/nvidia/hpc_sdk
NVVERSION = 21.3
NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
# Use NV HPC-SDK provided BLAS and LAPACK libraries
BLAS = -lblas
LAPACK = -llapack
BLACS =
SCALAPACK = -Mscalapack
LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS)
# Software emulation of quadruple precsion
QD ?= $(NVROOT)/compilers/extras/qd
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Use the FFTs from fftw
FFTW ?= /xtmp/khs/fftw3/fftw-3.3.8
LLIBS += -L$(FFTW)/lib -lfftw3
INCS += -I$(FFTW)/include
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = nvfortran
CC_LIB = nvc
CFLAGS_LIB = -O
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB= linpack_double.o getshmem.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
# Normally no need to change this
SRCDIR = ../../src
BINDIR = ../../bin
#================================================
# GPU Stuff
CPP_GPU = -DCUDA_GPU -DRPROMU_CPROJ_OVERLAP -DCUFFT_MIN=28 -UscaLAPACK -Ufock_dblbuf # -DUSE_PINNED_MEMORY
OBJECTS_GPU= fftmpiw.o fftmpi_map.o fft3dlib.o fftw3d_gpu.o fftmpiw_gpu.o
CC = nvc
CXX = nvc++
CFLAGS = -fPIC -DADD_ -mp -cuda -DMAGMA_WITH_MKL -DMAGMA_SETAFFINITY -DGPUSHMEM=300 -DHAVE_CUBLAS
## Use a custom CUDA installation: minimal requirement is CUDA >= 10.X. For "sm_80" you need CUDA >= 11.X.
#CUDA_ROOT ?= /usr/local/cuda
#NVCC := $(CUDA_ROOT)/bin/nvcc
#CUDA_LIB := -L$(CUDA_ROOT)/lib64 -lnvToolsExt -lcudart -lcuda -lcufft -lcublas
# Or use the CUDA installation from the NV HPC-SDK
#CUDA_ROOT ?= $(NVROOT)/cuda/11.0
NVCC := nvcc
CUDA_LIB = -cudalib=cublas,cufft -lnvToolsExt
GENCODE_ARCH := -gencode=arch=compute_60,code=\"sm_60,compute_60\" \
-gencode=arch=compute_70,code=\"sm_70,compute_70\" \
-gencode=arch=compute_80,code=\"sm_80,compute_80\"
MPI_INC = $(NVROOT)/comm_libs/mpi/include