diff --git a/lib/hipfort/hipfort_hipmalloc.F90 b/lib/hipfort/hipfort_hipmalloc.F90 index c1423a31..9ee3d20b 100644 --- a/lib/hipfort/hipfort_hipmalloc.F90 +++ b/lib/hipfort/hipfort_hipmalloc.F90 @@ -35,12 +35,14 @@ module hipfort_hipmalloc private :: byte_size +#ifdef USE_FPOINTER_INTERFACES interface byte_size module procedure byte_size_int32, & byte_size_int64, & byte_size_real32, & byte_size_real64 endinterface +#endif interface hipMalloc !> diff --git a/lib/hipfort/hipfort_rocsolver.F90 b/lib/hipfort/hipfort_rocsolver.F90 index 348b9338..8e59badd 100644 --- a/lib/hipfort/hipfort_rocsolver.F90 +++ b/lib/hipfort/hipfort_rocsolver.F90 @@ -23979,6 +23979,187 @@ function rocsolver_zhegvd_strided_batched_(handle,itype,evect,uplo,n,A,lda,strid rocsolver_zhegvd_strided_batched_full_rank,& rocsolver_zhegvd_strided_batched_rank_0,& rocsolver_zhegvd_strided_batched_rank_1 +#endif + end interface + !> \brief + !> HEGVDX computes a set of the eigenvalues and optionally the corresponding + !> eigenvectors of a complex generalized Hermitian-definite eigenproblem. + !> + !> \details + !> Computes a set of the eigenvalues and optionally the corresponding + !> eigenvectors of a complex generalized Hermitian-definite eigenproblem. + !> The eignvectors are computed using a divide and conquer algorithm. + !> The generalized eigenproblem solved is one of the following, depending on + !> the value of @c itype: + !> - itype = rocblas_eform_1 : A * x = lambda * B * x + !> - itype = rocblas_eform_2 : A * B * x = lambda * x + !> - itype = rocblas_eform_3 : B * A * x = lambda * x + !> + !> When eigenvectors are computed, the matrix Z is normalized such that + !> Z^H * B * Z = I, i.e. the eigenvectors are B-orthonormal. + !> + !> Depending on @c erange, the routine computes: + !> - all eigenvalues, + !> - eigenvalues in the half-open interval (vl, vu], or + !> - the il-th through iu-th eigenvalues (by index). + !> + !> If @c evect = rocblas_evect_original, the eigenvectors corresponding to + !> the selected eigenvalues are computed as well. + !> + !> @param[in] handle + !> rocblas_handle. The GPU library context. + !> + !> @param[in] itype + !> rocblas_eform. Specifies the form of the generalized eigenproblem. + !> + !> @param[in] evect + !> rocblas_evect. Specifies whether eigenvectors are computed. + !> - rocblas_evect_none : eigenvalues only. + !> - rocblas_evect_original : eigenvalues and eigenvectors. + !> Note: rocblas_evect_tridiagonal is not supported. + !> + !> @param[in] erange + !> rocblas_erange. Specifies which eigenvalues to compute: + !> - rocblas_erange_all + !> - rocblas_erange_value + !> - rocblas_erange_index + !> + !> @param[in] uplo + !> rocblas_fill. Indicates whether the upper or lower part of A and B + !> is stored. The opposite part is not referenced. + !> + !> @param[in] n + !> rocblas_int. Matrix order. n >= 0. + !> + !> @param[in,out] A + !> Complex array on the GPU of dimension (lda,n). + !> On entry: Hermitian matrix A. + !> On exit: destroyed. + !> + !> @param[in] lda + !> rocblas_int. Leading dimension of A. lda >= n. + !> + !> @param[in,out] B + !> Complex array on the GPU of dimension (ldb,n). + !> On entry: Hermitian positive definite matrix B. + !> On exit: triangular factor from POTRF. + !> + !> @param[in] ldb + !> rocblas_int. Leading dimension of B. ldb >= n. + !> + !> @param[in] vl + !> real type. Lower bound of interval (vl, vu]. + !> Ignored unless erange = rocblas_erange_value. + !> + !> @param[in] vu + !> real type. Upper bound of interval (vl, vu]. + !> Ignored unless erange = rocblas_erange_value. + !> + !> @param[in] il + !> rocblas_int. Index of smallest eigenvalue to be computed. + !> Ignored unless erange = rocblas_erange_index. + !> + !> @param[in] iu + !> rocblas_int. Index of largest eigenvalue to be computed. + !> Ignored unless erange = rocblas_erange_index. + !> + !> @param[out] nev + !> rocblas_int (device). Number of eigenvalues found. + !> + !> @param[out] W + !> real array on the GPU of dimension n. + !> First nev elements contain eigenvalues. + !> + !> @param[out] Z + !> Complex array on the GPU of dimension (ldz,nev). + !> If evect /= rocblas_evect_none and info = 0, contains the eigenvectors. + !> + !> @param[in] ldz + !> rocblas_int. Leading dimension of Z. ldz >= n. + !> + !> @param[out] info + !> rocblas_int (device). + !> - 0: successful exit + !> - i (1 <= i <= n): i columns of Z failed to converge + !> - n + i: leading minor of order i of B not positive definite + !> + !> @note + !> - All matrices and arrays are GPU device memory. + !> - Eigenvectors (if requested) are B-orthonormal: Z^H * B * Z = I. + !> - Divide-and-conquer algorithm is used for eigenvectors. + !> - When erange = rocblas_erange_value, nev is not known in advance, + !> allocate Z with n columns. + interface rocsolver_chegvdx + function rocsolver_chegvdx_(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) bind(C, name="rocsolver_chegvdx") + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_chegvdx_ + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + type(c_ptr), value :: A + integer(c_int), value :: lda + type(c_ptr), value :: B + integer(c_int), value :: ldb + real(c_float), value :: vl + real(c_float), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + type(c_ptr), value :: W + type(c_ptr), value :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + end function rocsolver_chegvdx_ + +#ifdef USE_FPOINTER_INTERFACES + module procedure & + rocsolver_chegvdx_full_rank,& + rocsolver_chegvdx_rank_0,& + rocsolver_chegvdx_rank_1 +#endif + end interface + + interface rocsolver_zhegvdx + function rocsolver_zhegvdx_(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) bind(C, name="rocsolver_zhegvdx") + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_zhegvdx_ + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + type(c_ptr), value :: A + integer(c_int), value :: lda + type(c_ptr), value :: B + integer(c_int), value :: ldb + real(c_double), value :: vl + real(c_double), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + type(c_ptr), value :: W + type(c_ptr), value :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + end function rocsolver_zhegvdx_ + +#ifdef USE_FPOINTER_INTERFACES + module procedure & + rocsolver_zhegvdx_full_rank,& + rocsolver_zhegvdx_rank_0,& + rocsolver_zhegvdx_rank_1 #endif end interface !> \brief GETRI_OUTOFPLACE computes the inverse \f$C = A^{-1}\f$ of a general n-by-n matrix A. @@ -59506,6 +59687,197 @@ function rocsolver_zsytrf_strided_batched_rank_1(handle,uplo,n,A,lda,strideA,ipi rocsolver_zsytrf_strided_batched_rank_1 = rocsolver_zsytrf_strided_batched_(handle,uplo,n,c_loc(A),lda,strideA,c_loc(ipiv),strideP,myInfo,batch_count) end function + function rocsolver_chegvdx_full_rank(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_chegvdx_full_rank + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + complex(c_float_complex), target, dimension(:,:) :: A + integer(c_int), value :: lda + complex(c_float_complex), target, dimension(:,:) :: B + integer(c_int), value :: ldb + real(c_float), value :: vl + real(c_float), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + real(c_float), target, dimension(:) :: W + complex(c_float_complex), target, dimension(:,:) :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + + rocsolver_chegvdx_full_rank = rocsolver_chegvdx_(handle, itype, evect, erange, uplo, n, c_loc(A), lda, c_loc(B), ldb, & + vl, vu, il, iu, nev, c_loc(W), c_loc(Z), ldz, info) + + end function rocsolver_chegvdx_full_rank + + function rocsolver_chegvdx_rank_1(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_chegvdx_rank_1 + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + complex(c_float_complex), target, dimension(:) :: A + integer(c_int), value :: lda + complex(c_float_complex), target, dimension(:) :: B + integer(c_int), value :: ldb + real(c_float), value :: vl + real(c_float), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + real(c_float), target, dimension(:) :: W + complex(c_float_complex), target, dimension(:) :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + + rocsolver_chegvdx_rank_1 = rocsolver_chegvdx_(handle, itype, evect, erange, uplo, n, c_loc(A), lda, c_loc(B), ldb, & + vl, vu, il, iu, nev, c_loc(W), c_loc(Z), ldz, info) + + end function rocsolver_chegvdx_rank_1 + + function rocsolver_chegvdx_rank_0(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_chegvdx_rank_0 + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + complex(c_float_complex), target :: A + integer(c_int), value :: lda + complex(c_float_complex), target :: B + integer(c_int), value :: ldb + real(c_float), value :: vl + real(c_float), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + real(c_float), target :: W + complex(c_float_complex), target :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + + rocsolver_chegvdx_rank_0 = rocsolver_chegvdx_(handle, itype, evect, erange, uplo, n, c_loc(A), lda, c_loc(B), ldb, & + vl, vu, il, iu, nev, c_loc(W), c_loc(Z), ldz, info) + + end function rocsolver_chegvdx_rank_0 + + function rocsolver_zhegvdx_full_rank(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_zhegvdx_full_rank + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + complex(c_double_complex), target, dimension(:,:) :: A + integer(c_int), value :: lda + complex(c_double_complex), target, dimension(:,:) :: B + integer(c_int), value :: ldb + real(c_double), value :: vl + real(c_double), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + real(c_double), target, dimension(:) :: W + complex(c_double_complex), target, dimension(:,:) :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + + rocsolver_zhegvdx_full_rank = rocsolver_zhegvdx_(handle, itype, evect, erange, uplo, n, c_loc(A), lda, c_loc(B), ldb, & + vl, vu, il, iu, nev, c_loc(W), c_loc(Z), ldz, info) + + end function rocsolver_zhegvdx_full_rank + + function rocsolver_zhegvdx_rank_1(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_zhegvdx_rank_1 + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + complex(c_double_complex), target, dimension(:) :: A + integer(c_int), value :: lda + complex(c_double_complex), target, dimension(:) :: B + integer(c_int), value :: ldb + real(c_double), value :: vl + real(c_double), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + real(c_double), target, dimension(:) :: W + complex(c_double_complex), target, dimension(:) :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + + rocsolver_zhegvdx_rank_1 = rocsolver_zhegvdx_(handle, itype, evect, erange, uplo, n, c_loc(A), lda, c_loc(B), ldb, & + vl, vu, il, iu, nev, c_loc(W), c_loc(Z), ldz, info) + + end function rocsolver_zhegvdx_rank_1 + + function rocsolver_zhegvdx_rank_0(handle, itype, evect, erange, uplo, n, A, lda, B, ldb, & + vl, vu, il, iu, nev, W, Z, ldz, info) + use iso_c_binding + use hipfort_rocblas_enums + use hipfort_rocsolver_enums + implicit none + integer(kind(rocblas_status_success)) :: rocsolver_zhegvdx_rank_0 + type(c_ptr), value :: handle + integer(kind(rocblas_eform_ax)), value :: itype + integer(kind(rocblas_evect_original)), value :: evect + integer(kind(rocblas_erange_all)), value :: erange + integer(kind(rocblas_fill_upper)), value :: uplo + integer(c_int), value :: n + complex(c_double_complex), target :: A + integer(c_int), value :: lda + complex(c_double_complex), target :: B + integer(c_int), value :: ldb + real(c_double), value :: vl + real(c_double), value :: vu + integer(c_int), value :: il + integer(c_int), value :: iu + integer(c_int), intent(out) :: nev + real(c_double), target :: W + complex(c_double_complex), target :: Z + integer(c_int), value :: ldz + integer(c_int), intent(out) :: info + + rocsolver_zhegvdx_rank_0 = rocsolver_zhegvdx_(handle, itype, evect, erange, uplo, n, c_loc(A), lda, c_loc(B), ldb, & + vl, vu, il, iu, nev, c_loc(W), c_loc(Z), ldz, info) + + end function rocsolver_zhegvdx_rank_0 #endif end module hipfort_rocsolver diff --git a/lib/hipfort/hipfort_rocsolver_enums.F90 b/lib/hipfort/hipfort_rocsolver_enums.F90 index f18afc5e..3703e5af 100644 --- a/lib/hipfort/hipfort_rocsolver_enums.F90 +++ b/lib/hipfort/hipfort_rocsolver_enums.F90 @@ -66,10 +66,14 @@ module hipfort_rocsolver_enums enumerator :: rocblas_eform_bax = 223 end enum - + enum, bind(c) + enumerator :: rocblas_erange_all = 231 + enumerator :: rocblas_erange_value = 232 + enumerator :: rocblas_erange_index = 233 + end enum #ifdef USE_FPOINTER_INTERFACES #endif -end module hipfort_rocsolver_enums \ No newline at end of file +end module hipfort_rocsolver_enums diff --git a/test/README.openmp b/test/README.openmp new file mode 100644 index 00000000..6181ab3b --- /dev/null +++ b/test/README.openmp @@ -0,0 +1,2 @@ +openmp contains tests that combine hipfort with OpenMP offload. Those are intended to be used with amdflang or any other compiler +using. Note that they contain a different build mechanism that the other as one needs to specify the architecture. diff --git a/test/openmp/CMakeLists.txt b/test/openmp/CMakeLists.txt new file mode 100644 index 00000000..5f58a8aa --- /dev/null +++ b/test/openmp/CMakeLists.txt @@ -0,0 +1,213 @@ +# HIPFORT using OpenMP offload with amdflang to AMD GPUs + +cmake_minimum_required(VERSION 3.25) +project(hipfort_openmp LANGUAGES Fortran C CXX) + +##### Compiler ############## +# This file is intended to be used with amdflang +set(CMAKE_Fortran_COMPILER "amdflang" CACHE FILEPATH "Fortran compiler (set full path to amdflang if needed)") + +if(NOT CMAKE_Fortran_COMPILER_ID MATCHES "LLVMFlang") + message(FATAL_ERROR "This project is intended to be used with amdflang or equivalent") +endif() + +set(CMAKE_Fortran_FLAGS "-O3 -g -cpp -fno-fast-math") + +######## OpenMP offload ########## + +# GPU target (amdgcn arch), default to a recent AMD GPU arch; user may override +set(GPU_ARCH "None" CACHE STRING "AMD GPU architecture for OpenMP/HIP offload (e.g. gfx906,gfx90a)") + +if (GPU_ARCH MATCHES "None") + message(FATAL_ERROR "Please provide the GPU_ARCH (e.g. gfx906,gfx90a)") +endif() + +# Unified shared memory +option(USM "Turn on if your GPU and CPU share physical memory" OFF) + +# Find OpenMP +find_package(OpenMP REQUIRED) +# Create a Fortran source file for testing OpenMP offload capabilities +file(WRITE ${CMAKE_BINARY_DIR}/config_tests/test_omp_offload.f90 " +program main + + use iso_fortran_env, only: i32 => int32, r64 => real64 + use iso_c_binding + use omp_lib + + implicit none + + integer(i32), parameter :: n = 4000_i32 + complex(r64), parameter :: zzero = cmplx(0.0, 0.0, kind=r64) + complex(r64), parameter :: zone = cmplx(1.0, 0.0, kind=r64) + + complex(r64), pointer, contiguous :: A(:,:) + type(c_ptr) :: A_cptr + + complex(r64), allocatable :: B(:,:) + complex(r64), allocatable :: C(:,:) + integer(i32) :: i, j + real(r64) :: start, finish + + call cpu_time(start) + + ! Imagine B is given by an external routine in the CPU + allocate(B(n,n), source=cmplx(42.0, 42.0, kind=r64)) + !$omp target enter data map(to: B) + + allocate(C(n,n)) + !$omp target enter data map(alloc: C) + + A_cptr = omp_target_alloc(2 * c_sizeof(1.0_c_double) * n * n, omp_get_default_device()) + call c_f_pointer(A_cptr, A, int([n,n],kind=c_size_t)) + A(0:n-1,0:n-1) => A(:,:) + + !$omp target has_device_addr(A) + !$omp teams distribute parallel do simd private(i, j) collapse(2) + do i = 0, n-1 + do j = 0, n-1 + A(j,i) = fillA(i,j) + 1.0 + C(j,i) = A(j,i) + B(j,i) + end do + end do + !$omp end teams distribute parallel do simd + !$omp end target + + nullify(A) + call omp_target_free(A_cptr, omp_get_default_device()) + !$omp target update from(C) + !$omp target exit data map(delete: C) + !$omp target exit data map(delete: B) + + call cpu_time(finish) + + write(*,*) A(1,1) + print '(\"Time = \", f16.3, \" seconds.\")', finish - start + +contains + + pure complex(r64) function fillA(i,j) + + integer(i32), intent(in) :: i, j + !$omp declare target(fillA) + + fillA = cmplx(0, 0, r64) + + end function fillA + +end program main") + +# Create a log file for the config test +set(OMPOFFLOAD_CONFIG_TEST_LOG "${CMAKE_BINARY_DIR}/config_tests/ompoffload_test_log.txt") +file(WRITE "${OMPOFFLOAD_CONFIG_TEST_LOG}" "Compilation Log:\n") +set(OMPOFFLOAD_CONFIG_TEST_LOG_STR "") + +# Try to compile and run the Fortran test program during configuration. +execute_process( + COMMAND ${CMAKE_Fortran_COMPILER} ${OpenMP_Fortran_FLAGS} -o test_omp_offload test_omp_offload.f90 + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/config_tests + RESULT_VARIABLE OMPOFFLOAD_TEST_COMPILE_RESULT + OUTPUT_VARIABLE OMPOFFLOAD_CONFIG_TEST_LOG_STR + ERROR_VARIABLE OMPOFFLOAD_CONFIG_TEST_LOG_STR +) + +# Append the compilation log to the log file +file(APPEND "${OMPOFFLOAD_CONFIG_TEST_LOG}" "${OMPOFFLOAD_CONFIG_TEST_LOG_STR}\n") + +# Check the compilation result and display a status message +if (OMPOFFLOAD_TEST_COMPILE_RESULT) + message(STATUS "OMP OFFLOAD: Supported") +else() + message(STATUS "OMP OFFLOAD: Unsupported") +endif() + +if (USM) + set(USM_FLAGS "-fopenmp-force-usm") + add_compile_definitions(_USM_) +endif() + +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OpenMP_Fortran_FLAGS} -fopenmp-version=51 --offload-arch=${GPU_ARCH} -fopenmp-offload-mandatory ${USM_FLAGS}") + +################# ROCm libraries ######################## +# Find rocFFT +find_package(rocfft REQUIRED) +set(rocfftlib ${ROCFFT_LIBRARIES}) +message(STATUS "rocFFT found : ${rocfftlib}") + +# Find rocBLAS +find_package(rocblas REQUIRED) +set(rocblaslib ${ROCBLAS_LIBRARIES}) +message(STATUS "rocblas found : ${rocblaslib}") + +# Find rocSOLVER +find_package(rocsolver REQUIRED) +set(rocsolverlib ${ROCSOLVER_LIBRARIES}) +message(STATUS "rocSOLVER found: ${rocsolverlib}") + +message(STATUS "ROCM include directory: ${ROCFFT_INCLUDE_DIRS}") +file(READ ${ROCFFT_INCLUDE_DIRS}/rocm-core/rocm_version.h FILE_CONTENT) + +# Use regular expressions to extract version numbers +string(REGEX MATCH "#define ROCM_VERSION_MAJOR[ \t]+([0-9]+)" _major_match "${FILE_CONTENT}") +string(REGEX MATCH "#define ROCM_VERSION_MINOR[ \t]+([0-9]+)" _minor_match "${FILE_CONTENT}") +string(REGEX MATCH "#define ROCM_VERSION_PATCH[ \t]+([0-9]+)" _patch_match "${FILE_CONTENT}") + +# If not found, default to 0 +if(_major_match) + string(REGEX REPLACE "#define ROCM_VERSION_MAJOR[ \t]+([0-9]+)" "\\1" ROCM_VERSION_MAJOR "${_major_match}") +else() + set(ROCM_VERSION_MAJOR 0) +endif() + +if(_minor_match) + string(REGEX REPLACE "#define ROCM_VERSION_MINOR[ \t]+([0-9]+)" "\\1" ROCM_VERSION_MINOR "${_minor_match}") +else() + set(ROCM_VERSION_MINOR 0) +endif() + +if(_patch_match) + string(REGEX REPLACE "#define ROCM_VERSION_PATCH[ \t]+([0-9]+)" "\\1" ROCM_VERSION_PATCH "${_patch_match}") +else() + set(ROCM_VERSION_PATCH 0) +endif() + +if ((ROCM_VERSION_MAJOR EQUAL 0) AND (ROCM_VERSION_MINOR EQUAL 0) AND (ROCM_VERSION_PATCH EQUAL 0)) + message(FATAL_ERROR "ROCm version not found") +else() + message(STATUS "ROCM version : ${ROCM_VERSION_MAJOR}.${ROCM_VERSION_MINOR}.${ROCM_VERSION_PATCH}") +endif() + +set(ROCM_VERSION_NUM ${ROCM_VERSION_MAJOR}.${ROCM_VERSION_MINOR}${ROCM_VERSION_PATCH}) + +##### BLAS/LAPACK ##### +find_library(BLIS_LIBRARY NAMES blis-mt blis) +find_library(FLAME_LIBRARY NAMES flame) +if(NOT FLAME_LIBRARY OR NOT BLIS_LIBRARY) + find_package(BLAS REQUIRED) + find_package(LAPACK REQUIRED) + list(APPEND BLAS_LIBRARIES ${LAPACK_LIBRARIES}) +else() + set(BLAS_LIBRARIES "${BLIS_LIBRARY}") + list(APPEND BLAS_LIBRARIES ${FLAME_LIBRARY}) +endif() +message(STATUS "Linear algebra libraries found : ${BLAS_LIBRARIES}") + +##### HIPFORT source ###### +file(GLOB_RECURSE hipfort_src + "${CMAKE_SOURCE_DIR}/../../lib/hipfort/*.F90" + "${CMAKE_SOURCE_DIR}/../../lib/hipfort/*.f90") + +##### Programs ############ + +add_executable(test_rocsolver_zhegvdx ${CMAKE_SOURCE_DIR}/rocsolver/test_rocsolver_zhegvdx.f90 ${hipfort_src}) +target_link_libraries(test_rocsolver_zhegvdx PUBLIC roc::rocsolver ${BLAS_LIBRARIES} flang_rt.hostdevice) +set_property(TARGET test_rocsolver_zhegvdx PROPERTY LINKER_LANGUAGE Fortran) + +#### Test #### +enable_testing() +add_test(NAME test_rocsolver_zhegvdx COMMAND test_rocsolver_zhegvdx) + + + + + diff --git a/test/openmp/rocsolver/test_rocsolver_zhegvdx.f90 b/test/openmp/rocsolver/test_rocsolver_zhegvdx.f90 new file mode 100644 index 00000000..3dd30744 --- /dev/null +++ b/test/openmp/rocsolver/test_rocsolver_zhegvdx.f90 @@ -0,0 +1,159 @@ +program test_rocsolver_zhegvdx + + use iso_fortran_env, only: i32=>int32, r32=>real32, r64=>real64 + use iso_c_binding + use hipfort + use hipfort_rocblas_enums + use hipfort_rocblas + use hipfort_rocsolver_enums + use hipfort_rocsolver + use omp_lib + + implicit none + + complex(r64), allocatable :: A_lapack(:,:), B_lapack(:,:) + complex(r64), pointer, contiguous :: A_rocsolver(:,:), B_rocsolver(:,:) + type(c_ptr) :: A_rocsolver_cptr, B_rocsolver_cptr + type(c_ptr) :: handle = c_null_ptr + integer(i32), parameter :: n = 3 + real(r64), parameter :: abstol = 1.0e-8_r64 + real(r64), parameter :: vl = 0.0_r64, vu = 0.0_r64 + integer(i32), parameter :: il = 1 + integer(i32), parameter :: iu = 2 + integer(i32), parameter :: expected_m = iu - il + 1 + integer(i32) :: lwork, liwork, lrwork, lwork_gpu + complex(r64), allocatable :: work(:) + real(r64), allocatable :: rwork(:) + integer(r64), allocatable :: iwork(:), ifail(:) + integer(i32) :: info, m, cerr + real(r64), parameter :: rtol = 1.0e-6_r64 + + complex(r64), allocatable :: Z(:,:), Zref(:,:) + real(r64), allocatable :: W(:), Wref(:) + + ! For the rocsolver + integer(i32) :: device_id + type(c_ptr) :: dA, dB, dW, dZ + + ! Get device id + device_id = omp_get_default_device() + write(*,*) "Device ID : ", device_id + + ! Fill the matrices + allocate(A_lapack(n,n), B_lapack(n,n)) + + A_lapack = reshape([ & + (12.952260828249717, 0.0), (-0.8136520598298568, -1.207368113317406), (6.021437566362058, -1.530839969546172), & + (-0.8136520598298568, 1.207368113317406), (4.084378436292846, 0.0), (0.862574753601529, -0.5636477131011359), & + (6.021437566362058, 1.530839969546172), (0.862574753601529, 0.5636477131011359), (13.110316501843691, 0.0) & + ], shape=[3,3], order=[2,1]) + + + B_lapack = reshape([ & + (4.769034939442435, 0.0), (2.9233910006323103, 2.248832817522154), (2.3758726221976016, 0.4747827863735063), & + (2.9233910006323103, -2.248832817522154), (13.323300302441238, 0.0), (3.5288683548091355, -1.5600560529683896), & + (2.3758726221976016, -0.4747827863735063), (3.5288683548091355, 1.5600560529683896), (13.826224543945107, 0.0) & + ], shape=[3,3], order=[2,1]) + + A_rocsolver_cptr = omp_target_alloc(2 * size(A_lapack) * c_sizeof(1_r64), device_id) + B_rocsolver_cptr = omp_target_alloc(2 * size(B_lapack) * c_sizeof(1_r64), device_id) + + call c_f_pointer(A_rocsolver_cptr, A_rocsolver, [n,n]) + call c_f_pointer(B_rocsolver_cptr, B_rocsolver, [n,n]) + + !$omp target map(to: A_lapack, B_lapack) has_device_addr(A_rocsolver, B_rocsolver) + A_rocsolver(:,:) = A_lapack(:,:) + B_rocsolver(:,:) = B_lapack(:,:) + !$omp end target + + nullify(A_rocsolver,B_rocsolver) + + ! First create the LAPACK reference + lwork = 2*n+n**2 + lrwork = 1 + 5*N + 2*N**2 + liwork = 3 + 5*N + allocate(work(lwork), rwork(lrwork), iwork(liwork)) + allocate(W(n)) + call zhegvd(1, 'V', 'U', n, A_lapack, n, B_lapack, n, W, work, lwork, rwork, lrwork, iwork, liwork, info) + if (info /= 0) error stop "LAPACK fail" + deallocate(work, rwork, iwork) + + write(*,*) "###LAPACK####" + write(*,*) "Eigenvalues :", W(:) + write(*,*) "Eigenvector 0 ", A_lapack(:,1) + write(*,*) "Eigenvector 1 ", A_lapack(:,2) + write(*,*) "Eigenvector 3 ", A_lapack(:,3) + + call move_alloc(W, Wref) + call move_alloc(A_lapack, Zref) + + allocate(W(n), Z(n,n)) + + m = -4 + info = -4 + + cerr = rocblas_create_handle(handle) + if (cerr /= rocblas_status_success) error stop "rocblas_create_handle failed" + + !$omp target data map(from: W, Z, m, info) + !$omp target data use_device_addr(m, info) + dW = get_device_pointer(W, device_id) + dZ = get_device_pointer(Z, device_id) + cerr = rocsolver_zhegvdx(handle, rocblas_eform_ax, rocblas_evect_original, rocblas_erange_index, rocblas_fill_upper, & + n, A_rocsolver_cptr, n, B_rocsolver_cptr, n, vl, vu, il, iu, m, dW, dZ, n, info) + if (cerr /= rocblas_status_success) error stop "rocsolver_zhegvdx failed" + + ! NOTE: it will be better to get the stream of the rocblas handler and sync that instead of the whole device + cerr = hipDeviceSynchronize() + if (cerr /= rocblas_status_success) error stop "hipDeviceSynchronize failed" + !$omp end target data + !$omp end target data + + if (info /= 0) then + write(*,*) "zhegvdx failed with info", info + error stop "FAILED" + endif + + write(*,*) "###ROCSOLVER####" + write(*,*) "Eigenvalues :", W(1:m) + write(*,*) "Eigenvector 0 ", Z(:,1) + write(*,*) "Eigenvector 1 ", Z(:,2) + + !! We call our interface not the one provided + cerr = rocblas_destroy_handle(handle) + if (cerr /= rocblas_status_success) error stop "rocblas_destroy_handle failed" + + call omp_target_free(A_rocsolver_cptr, device_id) + call omp_target_free(B_rocsolver_cptr, device_id) + + deallocate(A_lapack, B_lapack) + + write(*,*) "###CHECKS####" + if (expected_m /= m) then + error stop "Error(wrong number of states)" + else + write(*,*) "Correct: number of states" + end if + + if (norm2(W(1:m) - Wref(1:m)) / norm2(Wref(1:m)) > rtol) then + error stop "Error(wrong eigenvalues)" + else + write(*,*) "Correct: eigenvalues" + end if + + if (norm2(abs(Z(:,1:m)) - abs(Zref(:,1:m)))/norm2(abs(Zref(:,1:m))) > rtol) then + error stop "Error(wrong eigenvectors)" + else + write(*,*) "Correct: eigenvectors" + end if + +contains + + type(c_ptr) function get_device_pointer(host_data, device_id) + type(*), target, intent(in) :: host_data(..) + integer, intent(in) :: device_id + get_device_pointer = omp_get_mapped_ptr(c_loc(host_data), device_id) + if (.not. c_associated(get_device_pointer)) error stop "Error(get_device_pointer) : returned null pointer" + end function get_device_pointer + +end program test_rocsolver_zhegvdx