diff --git a/CMakeLists.txt b/CMakeLists.txt index 20184b948..76b06cce5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -608,6 +608,15 @@ add_to_config(_versionInfo HIP_VERSION_GITHASH "${HIP_VERSION_GITHASH}") file(WRITE ${PROJECT_BINARY_DIR}/bin/.hipVersion ${_versionInfo}) install(FILES ${PROJECT_BINARY_DIR}/bin/.hipVersion DESTINATION bin) +file(COPY ${CMAKE_SOURCE_DIR}/HIP/include/ + DESTINATION ${CMAKE_BINARY_DIR}/include) +file(COPY ${CMAKE_SOURCE_DIR}/include/ + DESTINATION ${CMAKE_BINARY_DIR}/include) +if(CHIP_BUILD_HIPBLAS) + file(COPY ${CMAKE_SOURCE_DIR}/H4I-HipBLAS/include/ + DESTINATION ${CMAKE_BINARY_DIR}/include) +endif() + # Setup .hipInfo. One for install and another for build directory. string(TIMESTAMP _timestamp UTC) set(_hipInfo_install "# Auto-generated by cmake on ${_timestamp} UTC\n") diff --git a/H4I-HipBLAS b/H4I-HipBLAS index 09d145a76..c255b7d00 160000 --- a/H4I-HipBLAS +++ b/H4I-HipBLAS @@ -1 +1 @@ -Subproject commit 09d145a76c053f8c32f6836cc9025cc25682cf4f +Subproject commit c255b7d00c916165ff375df423bddec1495d81ad diff --git a/HIPCC b/HIPCC index c92a7e0a3..480795b50 160000 --- a/HIPCC +++ b/HIPCC @@ -1 +1 @@ -Subproject commit c92a7e0a36c075f7a0eff6f6d2cf9c4ebc2cbbb1 +Subproject commit 480795b507ded4df00aec81a10bd553dd14b0611 diff --git a/include/hip/spirv_hip_complex.h b/include/hip/spirv_hip_complex.h index a0302c509..feb2f6cde 100644 --- a/include/hip/spirv_hip_complex.h +++ b/include/hip/spirv_hip_complex.h @@ -39,295 +39,132 @@ THE SOFTWARE. #endif #endif // !defined(__HIPCC_RTC__) -#if __cplusplus -#define COMPLEX_NEG_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type operator-(const type &op) { \ - type ret; \ - ret.x = -op.x; \ - ret.y = -op.y; \ - return ret; \ - } - -#define COMPLEX_EQ_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline bool operator==(const type &lhs, \ - const type &rhs) { \ - return lhs.x == rhs.x && lhs.y == rhs.y; \ - } - -#define COMPLEX_NE_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline bool operator!=(const type &lhs, \ - const type &rhs) { \ - return !(lhs == rhs); \ - } - -#define COMPLEX_ADD_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type operator+(const type &lhs, \ - const type &rhs) { \ - type ret; \ - ret.x = lhs.x + rhs.x; \ - ret.y = lhs.y + rhs.y; \ - return ret; \ - } - -#define COMPLEX_SUB_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type operator-(const type &lhs, \ - const type &rhs) { \ - type ret; \ - ret.x = lhs.x - rhs.x; \ - ret.y = lhs.y - rhs.y; \ - return ret; \ - } - -#define COMPLEX_MUL_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type operator*(const type &lhs, \ - const type &rhs) { \ - type ret; \ - ret.x = lhs.x * rhs.x - lhs.y * rhs.y; \ - ret.y = lhs.x * rhs.y + lhs.y * rhs.x; \ - return ret; \ - } - -#define COMPLEX_DIV_OP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type operator/(const type &lhs, \ - const type &rhs) { \ - type ret; \ - ret.x = (lhs.x * rhs.x + lhs.y * rhs.y); \ - ret.y = (rhs.x * lhs.y - lhs.x * rhs.y); \ - ret.x = ret.x / (rhs.x * rhs.x + rhs.y * rhs.y); \ - ret.y = ret.y / (rhs.x * rhs.x + rhs.y * rhs.y); \ - return ret; \ - } - -#define COMPLEX_ADD_PREOP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type &operator+=(type &lhs, const type &rhs) { \ - lhs.x += rhs.x; \ - lhs.y += rhs.y; \ - return lhs; \ - } - -#define COMPLEX_SUB_PREOP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type &operator-=(type &lhs, const type &rhs) { \ - lhs.x -= rhs.x; \ - lhs.y -= rhs.y; \ - return lhs; \ - } - -#define COMPLEX_MUL_PREOP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type &operator*=(type &lhs, const type &rhs) { \ - lhs = lhs * rhs; \ - return lhs; \ - } - -#define COMPLEX_DIV_PREOP_OVERLOAD(type) \ - __HOST_DEVICE__ static inline type &operator/=(type &lhs, const type &rhs) { \ - lhs = lhs / rhs; \ - return lhs; \ - } - -#define COMPLEX_SCALAR_PRODUCT(type, type1) \ - __HOST_DEVICE__ static inline type operator*(const type &lhs, type1 rhs) { \ - type ret; \ - ret.x = lhs.x * rhs; \ - ret.y = lhs.y * rhs; \ - return ret; \ - } - -#endif - -class hipFloatComplex : public HIP_vector_type {}; +typedef float2 hipFloatComplex; __HOST_DEVICE__ static inline float hipCrealf(hipFloatComplex z) { return z.x; } __HOST_DEVICE__ static inline float hipCimagf(hipFloatComplex z) { return z.y; } -__HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, - float b) { - hipFloatComplex z; - z.x = a; - z.y = b; - return z; +__HOST_DEVICE__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { + hipFloatComplex z; + z.x = a; + z.y = b; + return z; } __HOST_DEVICE__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { - hipFloatComplex ret; - ret.x = z.x; - ret.y = -z.y; - return ret; + hipFloatComplex ret; + ret.x = z.x; + ret.y = -z.y; + return ret; } __HOST_DEVICE__ static inline float hipCsqabsf(hipFloatComplex z) { - return z.x * z.x + z.y * z.y; + return z.x * z.x + z.y * z.y; } -__HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, - hipFloatComplex q) { - return make_hipFloatComplex(p.x + q.x, p.y + q.y); +__HOST_DEVICE__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x + q.x, p.y + q.y); } -__HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, - hipFloatComplex q) { - return make_hipFloatComplex(p.x - q.x, p.y - q.y); +__HOST_DEVICE__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x - q.x, p.y - q.y); } -__HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, - hipFloatComplex q) { - return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +__HOST_DEVICE__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { + return make_hipFloatComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); } -__HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, - hipFloatComplex q) { - float sqabs = hipCsqabsf(q); - hipFloatComplex ret; - ret.x = (p.x * q.x + p.y * q.y) / sqabs; - ret.y = (p.y * q.x - p.x * q.y) / sqabs; - return ret; +__HOST_DEVICE__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { + float sqabs = hipCsqabsf(q); + hipFloatComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; } -__HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { - return sqrtf(hipCsqabsf(z)); -} +__HOST_DEVICE__ static inline float hipCabsf(hipFloatComplex z) { return sqrtf(hipCsqabsf(z)); } -class hipDoubleComplex : public HIP_vector_type {}; -__HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { - return z.x; -} +typedef double2 hipDoubleComplex; -__HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { - return z.y; -} +__HOST_DEVICE__ static inline double hipCreal(hipDoubleComplex z) { return z.x; } -__HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, - double b) { - hipDoubleComplex z; - z.x = a; - z.y = b; - return z; +__HOST_DEVICE__ static inline double hipCimag(hipDoubleComplex z) { return z.y; } + +__HOST_DEVICE__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { + hipDoubleComplex z; + z.x = a; + z.y = b; + return z; } __HOST_DEVICE__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { - hipDoubleComplex ret; - ret.x = z.x; - ret.y = -z.y; - return ret; + hipDoubleComplex ret; + ret.x = z.x; + ret.y = -z.y; + return ret; } __HOST_DEVICE__ static inline double hipCsqabs(hipDoubleComplex z) { - return z.x * z.x + z.y * z.y; -} - -__HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, - hipDoubleComplex q) { - return make_hipDoubleComplex(p.x + q.x, p.y + q.y); + return z.x * z.x + z.y * z.y; } -__HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, - hipDoubleComplex q) { - return make_hipDoubleComplex(p.x - q.x, p.y - q.y); +__HOST_DEVICE__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x + q.x, p.y + q.y); } -__HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, - hipDoubleComplex q) { - return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); +__HOST_DEVICE__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x - q.x, p.y - q.y); } -__HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, - hipDoubleComplex q) { - double sqabs = hipCsqabs(q); - hipDoubleComplex ret; - ret.x = (p.x * q.x + p.y * q.y) / sqabs; - ret.y = (p.y * q.x - p.x * q.y) / sqabs; - return ret; +__HOST_DEVICE__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { + return make_hipDoubleComplex(p.x * q.x - p.y * q.y, p.y * q.x + p.x * q.y); } -__HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { - return sqrt(hipCsqabs(z)); +__HOST_DEVICE__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { + double sqabs = hipCsqabs(q); + hipDoubleComplex ret; + ret.x = (p.x * q.x + p.y * q.y) / sqabs; + ret.y = (p.y * q.x - p.x * q.y) / sqabs; + return ret; } -#if __cplusplus - -COMPLEX_NEG_OP_OVERLOAD(hipFloatComplex) -COMPLEX_EQ_OP_OVERLOAD(hipFloatComplex) -COMPLEX_NE_OP_OVERLOAD(hipFloatComplex) -COMPLEX_ADD_OP_OVERLOAD(hipFloatComplex) -COMPLEX_SUB_OP_OVERLOAD(hipFloatComplex) -COMPLEX_MUL_OP_OVERLOAD(hipFloatComplex) -COMPLEX_DIV_OP_OVERLOAD(hipFloatComplex) -COMPLEX_ADD_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_SUB_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_MUL_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_DIV_PREOP_OVERLOAD(hipFloatComplex) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned short) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed short) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned int) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed int) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, float) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, double) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, signed long long) -COMPLEX_SCALAR_PRODUCT(hipFloatComplex, unsigned long long) - -COMPLEX_NEG_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_EQ_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_NE_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_ADD_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_SUB_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_MUL_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_DIV_OP_OVERLOAD(hipDoubleComplex) -COMPLEX_ADD_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_SUB_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_MUL_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_DIV_PREOP_OVERLOAD(hipDoubleComplex) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned short) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed short) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned int) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed int) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, float) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, double) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, signed long long) -COMPLEX_SCALAR_PRODUCT(hipDoubleComplex, unsigned long long) - -#endif +__HOST_DEVICE__ static inline double hipCabs(hipDoubleComplex z) { return sqrt(hipCsqabs(z)); } typedef hipFloatComplex hipComplex; __HOST_DEVICE__ static inline hipComplex make_hipComplex(float x, float y) { - return make_hipFloatComplex(x, y); + return make_hipFloatComplex(x, y); } -__HOST_DEVICE__ static inline hipFloatComplex -hipComplexDoubleToFloat(hipDoubleComplex z) { - return make_hipFloatComplex((float)z.x, (float)z.y); +__HOST_DEVICE__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { + return make_hipFloatComplex((float)z.x, (float)z.y); } -__HOST_DEVICE__ static inline hipDoubleComplex -hipComplexFloatToDouble(hipFloatComplex z) { - return make_hipDoubleComplex((double)z.x, (double)z.y); +__HOST_DEVICE__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { + return make_hipDoubleComplex((double)z.x, (double)z.y); } -__HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, - hipComplex r) { - float real = (p.x * q.x) + r.x; - float imag = (q.x * p.y) + r.y; +__HOST_DEVICE__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { + float real = (p.x * q.x) + r.x; + float imag = (q.x * p.y) + r.y; - real = -(p.y * q.y) + real; - imag = (p.x * q.y) + imag; + real = -(p.y * q.y) + real; + imag = (p.x * q.y) + imag; - return make_hipComplex(real, imag); + return make_hipComplex(real, imag); } -__HOST_DEVICE__ static inline hipDoubleComplex -hipCfma(hipDoubleComplex p, hipDoubleComplex q, hipDoubleComplex r) { - double real = (p.x * q.x) + r.x; - double imag = (q.x * p.y) + r.y; +__HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, + hipDoubleComplex r) { + double real = (p.x * q.x) + r.x; + double imag = (q.x * p.y) + r.y; - real = -(p.y * q.y) + real; - imag = (p.x * q.y) + imag; + real = -(p.y * q.y) + real; + imag = (p.x * q.y) + imag; - return make_hipDoubleComplex(real, imag); + return make_hipDoubleComplex(real, imag); } #endif // SPIRV_HIP_COMPLEX_H diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index f9cbdfcf4..7e2009706 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -148,13 +148,6 @@ else cmake ../ -DCMAKE_BUILD_TYPE="$build_type" -DCHIP_BUILD_HIPBLAS=ON make all build_tests install -j $(nproc) #&> /dev/null echo "chipStar build complete." - - # Build libCEED - export CHIPSTAR_INSTALL_DIR=`pwd`/install # set CHIPSTAR_INSTALL_DIR to current build dir - export HIP_DIR=${CHIPSTAR_INSTALL_DIR} - export PATH=$PATH:${CHIPSTAR_INSTALL_DIR}/bin - export LIBCEED_DIR=`pwd`/libCEED - ../scripts/compile_libceed.sh ${CHIPSTAR_INSTALL_DIR} fi module unload opencl/dgpu @@ -166,9 +159,6 @@ echo "begin igpu_level0_failed_tests" # module load level-zero/igpu # module list ../scripts/check.py ./ igpu level0 --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee igpu_level0_make_check_result.txt -pushd ${LIBCEED_DIR} -make FC= CC=clang CXX=clang++ BACKENDS="/gpu/hip/ref /gpu/hip/shared /gpu/hip/gen" test -j | tee igpu_level0_make_check_result.txt -popd # module unload level-zero/igpu echo "end igpu_level0_failed_tests" @@ -177,9 +167,6 @@ echo "begin dgpu_level0_failed_tests" # module load level-zero/dgpu # module list ../scripts/check.py ./ dgpu level0 --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee dgpu_level0_make_check_result.txt -pushd ${LIBCEED_DIR} -make FC= CC=clang CXX=clang++ BACKENDS="/gpu/hip/ref /gpu/hip/shared /gpu/hip/gen" test -j | tee dgpu_level0_make_check_result.txt -popd # module unload level-zero/dgpu echo "end dgpu_level0_failed_tests" @@ -188,9 +175,6 @@ echo "begin igpu_opencl_failed_tests" # module load opencl/igpu # module list ../scripts/check.py ./ igpu opencl --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee igpu_opencl_make_check_result.txt -pushd ${LIBCEED_DIR} -make FC= CC=clang CXX=clang++ BACKENDS="/gpu/hip/ref /gpu/hip/shared /gpu/hip/gen" test -j | tee igpu_opencl_make_check_result.txt -popd # module unload opencl/igpu echo "end igpu_opencl_failed_tests" @@ -200,9 +184,6 @@ echo "begin dgpu_opencl_failed_tests" # module load opencl/dgpu # sets CHIP_BE # module list ../scripts/check.py ./ dgpu opencl --num-threads=${num_threads} --timeout=$timeout --num-tries=$num_tries --modules=on | tee dgpu_opencl_make_check_result.txt -pushd ${LIBCEED_DIR} -make FC= CC=clang CXX=clang++ BACKENDS="/gpu/hip/ref /gpu/hip/shared /gpu/hip/gen" test -j | tee dgpu_opencl_make_check_result.txt -popd # module unload opencl/dgpu intel/opencl echo "end dgpu_opencl_failed_tests" @@ -218,18 +199,6 @@ function check_tests { fi } -function check_libceed { - file="$1" - if grep -q "not ok" "$file"; then - echo "FAIL" - awk '/Test Summary Report/,EOF' "$file" - return 1 - else - echo "PASS" - return 0 - fi -} - overall_status=0 set +e echo "RESULTS:" @@ -246,17 +215,4 @@ do fi done -for test_result in dgpu_opencl_make_check_result.txt \ - igpu_opencl_make_check_result.txt \ - igpu_level0_make_check_result.txt \ - dgpu_level0_make_check_result.txt -do - echo -n "${test_result}: " - check_libceed "${test_result}" - test_status=$? - if [ $test_status -eq 1 ]; then - overall_status=1 - fi -done - exit $overall_status diff --git a/tests/compiler/CMakeLists.txt b/tests/compiler/CMakeLists.txt index 1220a66f8..0ea26dd78 100644 --- a/tests/compiler/CMakeLists.txt +++ b/tests/compiler/CMakeLists.txt @@ -74,6 +74,10 @@ add_hipcc_test(TestHipccAcceptCppFiles.cpp HIPCC_OPTIONS) add_hipcc_test(Test513Regression.hip HIPCC_OPTIONS) +if (CHIP_BUILD_HIPBLAS) + add_hipcc_test(hipBlas-ccompat.c HIPCC_OPTIONS -c) +endif() + add_test(NAME "TestHipccNeedsDashO" COMMAND ${CMAKE_BINARY_DIR}/bin/hipcc ${CMAKE_CURRENT_SOURCE_DIR}/TestHipccNeedDashO.cc) @@ -118,3 +122,4 @@ add_hipcc_test(TestAlignAttr.hip HIPCC_OPTIONS -fsyntax-only) # Check __FAST_MATH__ is set for -ffast-math and preprocessor guards # using it are not hiding errors. add_hipcc_test(TestFastMath.hip HIPCC_OPTIONS -fsyntax-only -ffast-math) + diff --git a/tests/compiler/hipBlas-ccompat.c b/tests/compiler/hipBlas-ccompat.c new file mode 100644 index 000000000..d3b5adacf --- /dev/null +++ b/tests/compiler/hipBlas-ccompat.c @@ -0,0 +1,6 @@ +#include +#include + +int main() { + +} \ No newline at end of file