diff --git a/src/gen_modules_diag.F90 b/src/gen_modules_diag.F90 index 702029394..ba94e45de 100644 --- a/src/gen_modules_diag.F90 +++ b/src/gen_modules_diag.F90 @@ -3254,7 +3254,11 @@ subroutine dvd_add_difflux_bhvisc(do_SDdvd, tr_num, dvd_tot, tr, trstar, gamma0_ ! first round !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, nz, ednodes, edelem, elnodes_l, elnodes_r, & !$OMP nu1, nl1, du, dv, dt, len, vi) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D!+eDim_edge2D ! skip boundary edges only consider inner edges if (myList_edge2D(edge) > edge2D_in) cycle @@ -3301,7 +3305,11 @@ subroutine dvd_add_difflux_bhvisc(do_SDdvd, tr_num, dvd_tot, tr, trstar, gamma0_ !___________________________________________________________________________ ! second round: +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D!+eDim_edge2D ! skip boundary edges only consider inner edges if (myList_edge2D(edge) > edge2D_in) cycle diff --git a/src/gen_support.F90 b/src/gen_support.F90 index 01ef9c0c6..8047641cf 100644 --- a/src/gen_support.F90 +++ b/src/gen_support.F90 @@ -337,13 +337,16 @@ subroutine integrate_nod_2D(data, int2D, partit, mesh) #if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row) !$OMP DO REDUCTION (+: lval) -#endif do row=1, myDim_nod2D lval=lval+data(row)*areasvol(ulevels_nod2D(row),row) end do -#if !defined(__openmp_reproducible) !$OMP END DO !$OMP END PARALLEL +#else + ! Use serial computation for reproducible results + do row=1, myDim_nod2D + lval=lval+data(row)*areasvol(ulevels_nod2D(row),row) + end do #endif int2D=0.0_WP call MPI_AllREDUCE(lval, int2D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -374,7 +377,12 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row, k, lval_row) +!$OMP DO ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) +#endif do row=1, myDim_nod2D lval_row = 0. do k=ulevels_nod2D(row), nlevels_nod2D(row)-1 @@ -382,13 +390,19 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) end do #if defined(__openmp_reproducible) !$OMP ORDERED -#endif +!$OMP ATOMIC UPDATE lval = lval + lval_row -#if defined(__openmp_reproducible) !$OMP END ORDERED +#else + lval = lval + lval_row #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int3D=0.0_WP call MPI_AllREDUCE(lval, int3D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -575,7 +589,7 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) character(3), intent(in) :: what real(kind=WP), optional :: nan !to be implemented upon the need (for masked arrays) real(kind=WP) :: omp_min_max_sum2 - real(kind=WP) :: val, vmasked, val_part(pos11:pos12) + real(kind=WP) :: val, vmasked, val_part(pos21:pos22) integer :: i, j @@ -663,7 +677,12 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row, k, lval_row) +!$OMP DO ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) +#endif do row=1, myDim_elem2D if(elem2D_nodes(1, row) > myDim_nod2D) cycle lval_row = 0. @@ -672,13 +691,19 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) end do #if defined(__openmp_reproducible) !$OMP ORDERED -#endif +!$OMP ATOMIC UPDATE lval = lval + lval_row -#if defined(__openmp_reproducible) !$OMP END ORDERED +#else + lval = lval + lval_row #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int3D=0.0_WP call MPI_AllREDUCE(lval, int3D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -707,7 +732,12 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row) +!$OMP DO ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row) REDUCTION(+: lval) +#endif do row=1, myDim_elem2D if(elem2D_nodes(1, row) > myDim_nod2D) cycle #if defined(__openmp_reproducible) @@ -718,7 +748,12 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) !$OMP END ORDERED #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int2D=0.0_WP call MPI_AllREDUCE(lval, int2D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & diff --git a/src/icb_step.F90 b/src/icb_step.F90 index bac9cff66..62521a85d 100644 --- a/src/icb_step.F90 +++ b/src/icb_step.F90 @@ -567,7 +567,7 @@ subroutine iceberg_step1(ice, mesh, partit, dynamics, ib, height_ib_single,lengt case(2) area_ib_tot = length_ib_single*width_ib_single*scaling(ib) end select -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx, area_ib_tot) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx) REDUCTION(+:area_ib_tot) !$OMP DO do idx = 1, size(elem_block) if (elem_block(idx) == iceberg_elem) then @@ -790,7 +790,7 @@ subroutine iceberg_step2(mesh, partit,arr, elem_from_block, ib, height_ib_single case(2) area_ib_tot = length_ib_single*width_ib_single*scaling(ib) end select -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx, area_ib_tot) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(idx) REDUCTION(+:area_ib_tot) !$OMP DO do idx = 1, size(elem_block_red) if (elem_block_red(idx) == iceberg_elem) then diff --git a/src/ice_EVP.F90 b/src/ice_EVP.F90 index 21ed55fe2..beff8f684 100755 --- a/src/ice_EVP.F90 +++ b/src/ice_EVP.F90 @@ -227,7 +227,11 @@ subroutine stress2rhs(ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) #if !defined(DISABLE_OPENACC_ATOMICS) @@ -716,7 +720,11 @@ subroutine EVPdynamics(ice, partit, mesh) ! apply sea ice velocity boundary condition #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else ! With the binary data of np2 goes only inside the first if !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index ce4564fa6..44ac6a72d 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -759,7 +759,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes) DEFAULT(PRESENT) @@ -901,7 +905,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes) DEFAULT(PRESENT) @@ -959,7 +967,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #if !defined(DISABLE_OPENACC_ATOMICS) @@ -1018,7 +1030,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #if !defined(DISABLE_OPENACC_ATOMICS) @@ -1078,7 +1094,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #endif @@ -1166,8 +1186,12 @@ SUBROUTINE ice_mass_matrix_fill(ice, partit, mesh) mass_matrix => ice%work%fct_massmatrix(:) ! ! a) -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa, flag, iflag) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO elem=1,myDim_elem2D elnodes=elem2D_nodes(:,elem) @@ -1326,7 +1350,11 @@ subroutine ice_TG_rhs_div(ice, partit, mesh) #ifndef ENABLE_OPENACC !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(diff, entries, um, vm, vol, dx, dy, n, q, row, elem, elnodes, c1, c2, c3, c4, cx1, cx2, cx3, cx4, entries2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes, dx, dy, entries, entries2) DEFAULT(PRESENT) diff --git a/src/ice_maEVP.F90 b/src/ice_maEVP.F90 index 791de938e..7a9ed277a 100644 --- a/src/ice_maEVP.F90 +++ b/src/ice_maEVP.F90 @@ -245,7 +245,11 @@ subroutine ssh2rhs(ice, partit, mesh) !_____________________________________________________________________________ ! use floating sea ice for zlevel and zstar if (use_floatice .and. .not. trim(which_ale)=='linfs') then +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -285,7 +289,11 @@ subroutine ssh2rhs(ice, partit, mesh) end do !$OMP END DO else +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -371,7 +379,11 @@ subroutine stress2rhs_m(ice, partit, mesh) !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(elem, elnodes, k, row, dx, dy, vol, mf, aa, bb, mass, cluster_area, elevation_elem) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -545,7 +557,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) ! use floating sea ice for zlevel and zstar !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(el, elnodes, vol, dx, dy, p_ice, n, bb, aa) if (use_floatice .and. .not. trim(which_ale)=='linfs') then +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2d elnodes=elem2D_nodes(:,el) @@ -588,7 +604,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) !_____________________________________________________________________________ ! use levitating sea ice for linfs, zlevel and zstar else +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2d elnodes=elem2D_nodes(:,el) !_______________________________________________________________________ @@ -685,7 +705,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) ! SD, 30.07.2014 !_______________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(el, i, ed, row, elnodes, dx, dy, meancos, eps1, eps2, delta, pressure, umod, drag, rhsu, rhsv, det, n) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2D if (ulevels(el)>1) cycle diff --git a/src/oce_adv_tra_driver.F90 b/src/oce_adv_tra_driver.F90 index 9b9606ea8..07251812f 100644 --- a/src/oce_adv_tra_driver.F90 +++ b/src/oce_adv_tra_driver.F90 @@ -131,7 +131,11 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(e, enodes, el, nl1, nu1, nl2, nu2, nu12, nl12, nz) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(e, enodes, el, nl1, nu1, nl2, nu2, nu12, nl12, nz) +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG PRIVATE(enodes, el) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) @@ -493,7 +497,11 @@ subroutine oce_tra_adv_flux2dtracer(dt, dttf_h, dttf_v, flux_h, flux_v, partit, #endif ! Horizontal #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG PRIVATE(enodes, el) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) diff --git a/src/oce_adv_tra_fct.F90 b/src/oce_adv_tra_fct.F90 index 218622e3c..751bdd0cc 100644 --- a/src/oce_adv_tra_fct.F90 +++ b/src/oce_adv_tra_fct.F90 @@ -267,7 +267,7 @@ subroutine oce_tra_adv_fct(dt, ttf, lo, adf_h, adf_v, fct_ttf_min, fct_ttf_max, #endif !Vertical #ifndef ENABLE_OPENACC -!$OMP DO +!$OMP DO #else !$ACC PARALLEL LOOP GANG DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) #endif @@ -288,7 +288,11 @@ subroutine oce_tra_adv_fct(dt, ttf, lo, adf_h, adf_v, fct_ttf_min, fct_ttf_max, #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !Horizontal #if !defined(DISABLE_OPENACC_ATOMICS) diff --git a/src/oce_ale.F90 b/src/oce_ale.F90 index fe0051854..08c470fcf 100644 --- a/src/oce_ale.F90 +++ b/src/oce_ale.F90 @@ -1709,8 +1709,11 @@ subroutine update_stiff_mat_ale(partit, mesh) ! to finite volumes" --> stiff matrix part ! loop over lcal edges factor=g*dt*alpha*theta - +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, i, j, k, row, ed, n2, enodes, elnodes, el, elem, npos, offset, nini, nend, fx, fy) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, i, j, k, row, ed, n2, enodes, elnodes, el, elem, npos, offset, nini, nend, fx, fy) +#endif do ed=1,myDim_edge2D !! Attention ! enodes ... local node indices of nodes that edge ed enodes=edges(:,ed) @@ -1840,7 +1843,11 @@ subroutine compute_ssh_rhs_ale(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, el, enodes, n, nz, nzmin, nzmax, c1, c2, deltaX1, deltaX2, deltaY1, deltaY2, & !$OMP dumc1_1, dumc1_2, dumc2_1, dumc2_2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -1992,7 +1999,11 @@ subroutine compute_hbar_ale(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, el, enodes, elem, elnodes, n, nz, nzmin, nzmax, & !$OMP c1, c2, deltaX1, deltaX2, deltaY1, deltaY2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -2171,7 +2182,11 @@ subroutine vert_vel_ale(dynamics, partit, mesh) !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, enodes, el, deltaX1, deltaY1, nz, nzmin, nzmax, deltaX2, deltaY2, c1, c2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -2725,7 +2740,11 @@ subroutine compute_vert_vel_transpv(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, ednodes, edelem, nz, nzmin, nzmax, & !$OMP deltaX1, deltaY1, deltaX2, deltaY2, c1, c2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed ednodes=edges(:,ed) diff --git a/src/oce_ale_pressure_bv.F90 b/src/oce_ale_pressure_bv.F90 index 2dbb2aa72..281eee270 100644 --- a/src/oce_ale_pressure_bv.F90 +++ b/src/oce_ale_pressure_bv.F90 @@ -2973,7 +2973,7 @@ subroutine compute_neutral_slope(partit, mesh) eps=5.0e-6_WP !PS S_cr=1.0e-2_WP !PS S_d=1.0e-3_WP -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, deltaX1, deltaY1, deltaX2, deltaY2, n, nz, nl1, ul1, el, elnodes, enodes, c, ro_z_inv, f_min, dep_scale, rssby, c1, c2) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, deltaX1, deltaY1, deltaX2, deltaY2, n, nz, nl1, ul1, el, elnodes, enodes, c, ro_z_inv, dep_scale, rssby, c1, c2) FIRSTPRIVATE(f_min) !$OMP DO do n=1, myDim_nod2D slope_tapered(: , :, n)=0._WP diff --git a/src/oce_ale_ssh_splitexpl_subcycl.F90 b/src/oce_ale_ssh_splitexpl_subcycl.F90 index e1069027f..1441a8317 100644 --- a/src/oce_ale_ssh_splitexpl_subcycl.F90 +++ b/src/oce_ale_ssh_splitexpl_subcycl.F90 @@ -110,7 +110,11 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(node, elem, ed, nz, nl1, ul1, nl2, ul2, nl12, ul12, & !$OMP uv1, uv2, uv12, qc, qu, qd, wu, wv, & !$OMP ednodes, edelem, un1, un2) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do node=1, myDim_nod2D ul1 = ulevels_nod2D(node) nl1 = nlevels_nod2D(node) @@ -179,7 +183,11 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !___________________________________________________________________________ ! 2nd. compute horizontal advection component: u*du/dx, u*dv/dx & v*du/dy, v*dv/dy ! loop over triangle edges -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed ednodes = edges(:,ed) @@ -199,6 +207,13 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) un1(ul1:nl1) = ( UVh(2, ul1:nl1, edelem(1))*edge_cross_dxdy(1,ed) & - UVh(1, ul1:nl1, edelem(1))*edge_cross_dxdy(2,ed)) + !___________________________________________________________________ + ! ensure openmp numerical reproducability + ! block had to be moved outside of if statement, otherwise the else + ! part was missing the !$OMP ORDERED +#if defined(__openmp_reproducible) +!$OMP ORDERED +#endif !_______________________________________________________________________ ! if edelem(2)==0 than edge is boundary edge if(edelem(2)>0) then @@ -221,11 +236,6 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) ul12 = max(ul1, ul2) nl12 = min(nl1, nl2) - !___________________________________________________________________ - ! ensure openmp numerical reproducability -#if defined(__openmp_reproducible) -!$OMP ORDERED -#endif !___________________________________________________________________ !NR add contribution to first edge node --> ednodes(1) !NR Do not calculate on Halo nodes, as the result will not be used. @@ -303,7 +313,7 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !_______________________________________________________________________ ! if edelem(2)==0 than edge is boundary edge - else ! --> if(edelem(2)>0) then + else !___________________________________________________________________ !NR add contribution to first edge node --> ednodes(1) !NR Do not calculate on Halo nodes, as the result will not be used. @@ -338,13 +348,13 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) call omp_unset_lock(partit%plock(ednodes(2))) #endif end if - end if ! --> if(edelem(2)>0) then + end if #if defined(__openmp_reproducible) !$OMP END ORDERED #endif - end do ! --> do ed=1, myDim_edge2D + end do !$OMP END DO !___________________________________________________________________________ @@ -847,7 +857,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! remove viscosity !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, edelem, ednodes, hh, len, & !$OMP vi, update_ubt, update_vbt) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D+eDim_edge2D ! if ed is an outer boundary edge, skip it @@ -902,7 +916,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! remove bottom drag if (dynamics%se_bottdrag) then !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(elem, elnodes, nzmax, hh) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do elem=1, myDim_elem2D elnodes= elem2D_nodes(:,elem) nzmax = nlevels(elem) @@ -952,7 +970,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) !___________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, edelem, ednodes, hh, len, & !$OMP vi, update_ubt, update_vbt) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D+eDim_edge2D ! if ed is an outer boundary edge, skip it @@ -1138,7 +1160,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! and advance ssh --> eta^(n+(m+1)/M) = eta^(n+(m)/M) - dt/M * div_H * [...] !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, ednodes, edelem, c1, c2, & !$OMP deltaX1, deltaX2, deltaY1, deltaY2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D ednodes = edges(:,edge) edelem = edge_tri(:,edge) diff --git a/src/oce_ale_tracer.F90 b/src/oce_ale_tracer.F90 index 9c6677944..5c233b7d9 100644 --- a/src/oce_ale_tracer.F90 +++ b/src/oce_ale_tracer.F90 @@ -1198,7 +1198,11 @@ subroutine diff_part_hor_redi(tracers, partit, mesh) !$OMP nl1, ul1, nl2, ul2, nl12, ul12, nz, el, elnodes, enodes, & !$OMP c, Fx, Fy, Tx, Ty, Tx_z, Ty_z, SxTz, SyTz, Tz, & !$OMP rhs1, rhs2, Kh, dz) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D rhs1=0.0_WP rhs2=0.0_WP @@ -1371,7 +1375,11 @@ SUBROUTINE diff_part_bh(tr_num, dynamics, tracers, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz, ed, el, en, k, elem, nzmin, nzmax, u1, v1, len, vi, tt, ww, & !$OMP elnodes1, elnodes2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D!+eDim_edge2D if (myList_edge2D(ed) > edge2D_in) cycle el=edge_tri(:,ed) @@ -1417,7 +1425,11 @@ SUBROUTINE diff_part_bh(tr_num, dynamics, tracers, partit, mesh) ! =========== ! Second round: ! =========== +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D!+eDim_edge2D if (myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) diff --git a/src/oce_ale_vel_rhs.F90 b/src/oce_ale_vel_rhs.F90 index 2cf0ab2b0..2f19e7959 100644 --- a/src/oce_ale_vel_rhs.F90 +++ b/src/oce_ale_vel_rhs.F90 @@ -423,7 +423,11 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) !___________________________________________________________________________ ! 2nd. compute horizontal advection component: u*du/dx, u*dv/dx & v*du/dy, v*dv/dy ! loop over triangle edges +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D nod = edges(:,ed) el1 = edge_tri(1,ed) @@ -454,6 +458,9 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) ! compute horizontal normal velocity with respect to the edge from triangle ! centroid towards triangel edge mid-pointe for element el2 when it is valid ! --> if its a boundary triangle el2 will be not valid +#if defined(__openmp_reproducible) +!$OMP ORDERED +#endif if (el2>0) then ! --> el2 is valid element nl2 = nlevels(el2)-1 ul2 = ulevels(el2) @@ -469,10 +476,6 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) un1(1:ul1-1) = 0._WP un2(1:ul2-1) = 0._WP -#if defined(__openmp_reproducible) -!$OMP ORDERED -#endif - ! first edge node ! Do not calculate on Halo nodes, as the result will not be used. ! The "if" is cheaper than the avoided computiations. diff --git a/src/oce_dyn.F90 b/src/oce_dyn.F90 index a265851bf..d995bc087 100755 --- a/src/oce_dyn.F90 +++ b/src/oce_dyn.F90 @@ -323,7 +323,11 @@ SUBROUTINE visc_filt_bcksct(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, nz, ed, el, nelem, k, elem, nzmin, nzmax, update_u, update_v) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -473,7 +477,11 @@ SUBROUTINE visc_filt_bilapl(dynamics, partit, mesh) !___________________________________________________________________________ ! Sum up velocity differences over edge with respect to elemtnal index !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, ed, el, nz, nzmin, nzmax) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el = edge_tri(:,ed) @@ -533,7 +541,11 @@ SUBROUTINE visc_filt_bilapl(dynamics, partit, mesh) !$OMP BARRIER !___________________________________________________________________________ +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -627,7 +639,11 @@ SUBROUTINE visc_filt_bidiff(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, ed, el, nz, nzmin, nzmax, update_u, update_v) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -675,7 +691,11 @@ SUBROUTINE visc_filt_bidiff(dynamics, partit, mesh) !$OMP BARRIER !___________________________________________________________________________ +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) diff --git a/src/oce_mesh.F90 b/src/oce_mesh.F90 index c61ae3374..ee21d83af 100755 --- a/src/oce_mesh.F90 +++ b/src/oce_mesh.F90 @@ -2510,6 +2510,7 @@ SUBROUTINE mesh_auxiliary_arrays(partit, mesh) center_x(n)=ax center_y(n)=ay mesh%elem_cos(n)=cos(ay) + if (metric_factor_zero) then mesh%metric_factor(n)=0.0_WP else diff --git a/src/oce_muscl_adv.F90 b/src/oce_muscl_adv.F90 index cb3c0638a..997a51560 100755 --- a/src/oce_muscl_adv.F90 +++ b/src/oce_muscl_adv.F90 @@ -92,7 +92,11 @@ subroutine muscl_adv_init(twork, partit, mesh) allocate(twork%nboundary_lay(myDim_nod2D+eDim_nod2D)) !node n becomes a boundary node after layer twork%nboundary_lay(n) twork%nboundary_lay=nl-1 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, n1, n2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do n=1, myDim_edge2D ! n1 and n2 are local indices n1=edges(1,n) diff --git a/src/solver.F90 b/src/solver.F90 index 2a1a95af1..3554513f9 100644 --- a/src/solver.F90 +++ b/src/solver.F90 @@ -252,7 +252,7 @@ subroutine ssh_solve_cg(x, rhs, solverinfo, partit, mesh) !$OMP END PARALLEL DO #else sprod(1) = sum(rr(1:myDim_nod2D) * zz(1:myDim_nod2D)) - sprod(1) = sum(rr(1:myDim_nod2D) * rr(1:myDim_nod2D)) + sprod(2) = sum(rr(1:myDim_nod2D) * rr(1:myDim_nod2D)) #endif call MPI_Allreduce(MPI_IN_PLACE, sprod, 2, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) diff --git a/src/write_step_info.F90 b/src/write_step_info.F90 index c43f7c631..9ad384bf5 100644 --- a/src/write_step_info.F90 +++ b/src/write_step_info.F90 @@ -114,7 +114,7 @@ subroutine write_step_info(istep, outfreq, ice, dynamics, tracers, partit, mesh) !_______________________________________________________________________ #if !defined(__openmp_reproducible) -!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n) REDUCTION(+:loc_eta, loc_hbar, loc_deta, loc_dhbar, loc_wflux) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n) REDUCTION(+:loc_eta, loc_hbar, loc_dhbar, loc_wflux) #endif do n=1, myDim_nod2D loc_eta = loc_eta + areasvol(ulevels_nod2D(n), n)*eta_n(n)