-
Notifications
You must be signed in to change notification settings - Fork 68
Openacc test main #693
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Openacc test main #693
Changes from all commits
bb16869
8d77d94
5e5d394
729aa7f
980a688
6613e88
1c53781
5af17ab
90e19b6
2acb0c3
714a8ee
2a1f5b4
108b152
5b7447f
90e1cbc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,7 @@ module load openmpi/4.1.2-intel-2021.5.0 | |
| export FC=mpif90 CC=mpicc CXX=mpicxx ; | ||
| spack load [email protected]%[email protected] # this handles adding to path elegantly then using hardcoded path below | ||
| #module load intel-oneapi-mkl/2022.0.1-gcc-11.2.0 | ||
| #export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH | ||
| export LD_LIBRARY_PATH=/sw/spack-levante/intel-oneapi-mkl-2022.0.1-ttdktf/mkl/2022.0.1/lib/intel64:$LD_LIBRARY_PATH | ||
|
|
||
| module load netcdf-c/4.8.1-openmpi-4.1.2-intel-2021.5.0 | ||
| module load netcdf-fortran/4.5.3-openmpi-4.1.2-intel-2021.5.0 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -243,7 +243,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, | |
| #ifndef ENABLE_OPENACC | ||
| !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz) | ||
| #else | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| #endif | ||
| !$OMP DO | ||
| do n=1, myDim_edge2D | ||
|
|
@@ -254,8 +254,12 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, | |
| !$ACC END LOOP | ||
| end do | ||
| !$OMP END DO | ||
| #ifndef ENABLE_OPENACC | ||
| #else | ||
| !$ACC END PARALLEL LOOP | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| #endif | ||
|
|
||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$OMP DO | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ifndef ENABLE_OPENACC is missing here and few omp blocks below, not sure openmp and openacc will work with this file but if it is too much we can defer to next PR you intend to do. |
||
| do n=1, myDim_nod2D | ||
| !$ACC LOOP VECTOR | ||
|
|
@@ -351,7 +355,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, | |
| !_______________________________________________________________________ | ||
| if (trim(tracers%data(tr_num)%tra_adv_lim)=='FCT') then | ||
| !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$OMP DO | ||
| do n=1, myDim_edge2D | ||
| !$ACC LOOP VECTOR | ||
|
|
@@ -365,7 +369,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, | |
| !$OMP END DO | ||
| !$ACC END PARALLEL LOOP | ||
|
|
||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$OMP DO | ||
| do n=1, myDim_nod2D | ||
| !$ACC LOOP VECTOR | ||
|
|
@@ -384,7 +388,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, | |
| !_______________________________________________________________________ | ||
| else | ||
| !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$OMP DO | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not guarded openmp meaning we can´t use openmp with openacc for other parts. can defer to next PR if you want to. |
||
| do n=1, myDim_edge2D | ||
| !$ACC LOOP VECTOR | ||
|
|
@@ -396,7 +400,7 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, | |
| !$OMP END DO | ||
| !$ACC END PARALLEL LOOP | ||
|
|
||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) | ||
| !$OMP DO | ||
| do n=1, myDim_nod2D | ||
| !$ACC LOOP VECTOR | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,9 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) | |
| type(t_tracer), intent(inout), target :: tracers | ||
| integer :: n,nz | ||
|
|
||
| #ifdef ENABLE_OPENACC | ||
| !$ACC parallel loop collapse(2) default(present) !!!async(1) | ||
| #endif | ||
| do n=1, partit%myDim_nod2D+partit%eDim_nod2D | ||
| do nz=1, mesh%nl-1 | ||
| ! del_ttf will contain all advection / diffusion contributions for this tracer. Set it to 0 at the beginning! | ||
|
|
@@ -34,7 +36,9 @@ SUBROUTINE init_tracers_AB(tr_num, tracers, partit, mesh) | |
| tracers%work%del_ttf_advvert (nz, n) = 0.0_WP | ||
| end do | ||
| end do | ||
| #ifdef ENABLE_OPENACC | ||
| !$ACC end parallel loop | ||
| #endif | ||
| !$OMP PARALLEL DO | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not guarded openmp meaning we can´t use openmp with openacc for other parts. can defer to next PR if you want to. |
||
| do n=1, partit%myDim_nod2D+partit%eDim_nod2D | ||
| ! AB interpolation | ||
|
|
@@ -220,4 +224,4 @@ SUBROUTINE relax_to_clim(tr_num, tracers, partit, mesh) | |
| !$OMP END PARALLEL DO | ||
| END IF | ||
| END SUBROUTINE relax_to_clim | ||
| END MODULE o_tracers | ||
| END MODULE o_tracers | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| #!/bin/bash | ||
| #SBATCH --job-name=fesom_gpu_test | ||
| #SBATCH --partition=gpu | ||
| #SBATCH --nodes=4 # Specify number of nodes | ||
| #SBATCH --ntasks-per-node=4 | ||
| #SBATCH --cpus-per-task=4 | ||
| #SBATCH --gpus=16 # 4 # 8 for 2 nodes | ||
| #SBATCH --gpus-per-task=1 #specific case when tasks=gpues | ||
| #SBATCH --exclusive | ||
| #SBATCH --mem=0 # Request all memory available on all nodes | ||
| #SBATCH --time=00:20:00 # Set a limit on the total run time | ||
| #SBATCH -o slurm.out | ||
| #SBATCH -e slurm.err | ||
| #SBATCH --account=ab0995 | ||
|
|
||
| set -e | ||
| export SLURM_CPUS_PER_TASK=4 | ||
|
|
||
| source /sw/etc/profile.levante | ||
| #source ../env/levante.dkrz.de/shell | ||
| read -r USED_SHELL <../bin/current_shell_path | ||
| source $USED_SHELL | ||
|
|
||
| #source /work/ab0995/a270232/refactoring/fesom2/env/levante.dkrz.de/shell.nvhpc | ||
| echo "using environment from" $USED_SHELL | ||
|
|
||
| ulimit -s 204800 # https://docs.dkrz.de/doc/levante/running-jobs/runtime-settings.html | ||
|
|
||
| echo Submitted job: $jobid | ||
| squeue -u $USER | ||
|
|
||
| # Check GPUs available for the job | ||
| nvidia-smi | ||
|
|
||
| # determine JOBID | ||
| JOBID=$(echo $SLURM_JOB_ID | cut -d"." -f1) | ||
|
|
||
| rm -f fesom.x | ||
| ln -s ../bin/fesom.x . # cp -n ../bin/fesom.x | ||
|
|
||
| export OMP_NUM_THREADS=4 | ||
| cp -n ../config/namelist.config . | ||
| cp -n ../config/namelist.forcing . | ||
| cp -n ../config/namelist.oce . | ||
| cp -n ../config/namelist.ice . | ||
| cp -n ../config/namelist.icepack . | ||
| cp -n ../config/namelist.tra . | ||
| cp -n ../config/namelist.io . | ||
| cp -n ../config/namelist.cvmix . | ||
| cp -n ../config/namelist.dyn . | ||
|
|
||
| ## levante specific gpu env used for ICON otherwise segfault | ||
| export OMPI_MCA_pml=ucx # Use UCX to support InfiniBand devices and CUDA [1] | ||
|
|
||
| export OMPI_MCA_btl="self" # Only use self transport to reduce overhead [2] | ||
|
|
||
| export UCX_RNDV_SCHEME=put_zcopy # Preferred communication scheme with Rendezvous protocol | ||
| export UCX_RNDV_THRESH=16384 # Threshold when to switch transport from TCP to NVLINK [3] | ||
|
|
||
| export UCX_IB_GPU_DIRECT_RDMA=yes # Allow remote direct memory access from/to GPU | ||
|
|
||
| export UCX_TLS=cma,rc,mm,cuda_ipc,cuda_copy,gdr_copy # Include cuda and gdr based transport layers for communication [4] | ||
|
|
||
| export UCX_MEMTYPE_CACHE=n | ||
|
|
||
| date | ||
| srun -l fesom.x >fesom2.out 2>&1 #> "fesom2.0.out" 2>&1 | ||
| # srun -l nsys profile -t cuda,osrt,mpi fesom.x > fesom2.out 2>&1 #> "fesom2.0.out" 2>&1 | ||
| date | ||
|
|
||
| # qstat -f $PBS_JOBID | ||
| #export EXITSTATUS=$? | ||
| #if [ ${EXITSTATUS} -eq 0 ] || [ ${EXITSTATUS} -eq 127 ] ; then | ||
| #sbatch job_mistral | ||
| #fi |
Uh oh!
There was an error while loading. Please reload this page.