diff --git a/.github/workflows/_Metax-X86.yaml b/.github/workflows/_Metax-X86.yaml index 486236955ad..3d2b6cb65fc 100644 --- a/.github/workflows/_Metax-X86.yaml +++ b/.github/workflows/_Metax-X86.yaml @@ -78,7 +78,7 @@ jobs: run: | cd backends/metax_gpu/tests - bash run_test.sh -j 8 + bash run_test.sh -j 32 - name: push whl env: diff --git a/.github/workflows/_Metax_work_private.yaml b/.github/workflows/_Metax_work_private.yaml index 3c1e163537a..fc65426c99c 100644 --- a/.github/workflows/_Metax_work_private.yaml +++ b/.github/workflows/_Metax_work_private.yaml @@ -79,7 +79,7 @@ jobs: run: | cd backends/metax_gpu/tests - bash run_test.sh -j 8 + bash run_test.sh -j 32 - name: push whl env: diff --git a/backends/metax_gpu/runtime/runtime.cc b/backends/metax_gpu/runtime/runtime.cc index 494b1a71258..9460cf574da 100644 --- a/backends/metax_gpu/runtime/runtime.cc +++ b/backends/metax_gpu/runtime/runtime.cc @@ -579,7 +579,8 @@ C_Status AsyncMemCpyH2D(const C_Device device, return C_ERROR; } - cudaErr = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice); + cudaErr = cudaMemcpyAsync( + dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream); if (cudaErr != cudaSuccess) { return C_ERROR; } @@ -605,7 +606,8 @@ C_Status AsyncMemCpyD2H(const C_Device device, return C_ERROR; } - cudaErr = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost); + cudaErr = cudaMemcpyAsync( + dst, src, size, cudaMemcpyDeviceToHost, (cudaStream_t)stream); if (cudaErr != cudaSuccess) { return C_ERROR; } @@ -633,7 +635,8 @@ C_Status AsyncMemCpyD2D(const C_Device device, return C_ERROR; } - cudaErr = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice); + cudaErr = cudaMemcpyAsync( + dst, src, size, cudaMemcpyDeviceToDevice, (cudaStream_t)stream); if (cudaErr != cudaSuccess) { return C_ERROR; }