Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions paddle/common/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1713,3 +1713,7 @@ PHI_DEFINE_EXPORTED_string(cusolver_dir, // NOLINT
PHI_DEFINE_EXPORTED_string(cusparse_dir, // NOLINT
"",
"Specify path for loading libcusparse.so.*.");
PHI_DEFINE_EXPORTED_string(
win_cuda_bin_dir, // NOLINT
"",
"Specify path for loading *.dll about cuda on windows");
123 changes: 114 additions & 9 deletions paddle/phi/backends/dynload/dynamic_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/phi/backends/dynload/dynamic_loader.h"
#include <dirent.h>

#include <codecvt>
#include <cstdlib>
#include <string>
#include <vector>
Expand Down Expand Up @@ -45,6 +46,7 @@ COMMON_DECLARE_string(cusparselt_dir);
COMMON_DECLARE_string(curand_dir);
COMMON_DECLARE_string(cusolver_dir);
COMMON_DECLARE_string(cusparse_dir);
COMMON_DECLARE_string(win_cuda_bin_dir);
#ifdef PADDLE_WITH_HIP

PHI_DEFINE_string(miopen_dir,
Expand Down Expand Up @@ -132,8 +134,12 @@ static constexpr char* win_cufft_lib =

static inline std::string join(const std::string& part1,
const std::string& part2) {
// directory separator
// directory separator
#if defined(_WIN32)
const char sep = '\\';
#else
const char sep = '/';
#endif
if (!part2.empty() && part2.front() == sep) {
return part2;
}
Expand Down Expand Up @@ -263,6 +269,26 @@ static inline void* GetDsoHandleFromSearchPath(
#else
int dynload_flags = 0;
#endif // !_WIN32
#if defined(_WIN32)
std::vector<std::wstring> cuda_bin_search_path = {
L"cublas",
L"cuda_nvrtc",
L"cuda_runtime",
L"cudnn",
L"cufft",
L"curand",
L"cusolver",
L"cusparse",
L"nvjitlink",
};
for (auto search_path : cuda_bin_search_path) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
std::wstring win_path_wstring =
converter.from_bytes(FLAGS_win_cuda_bin_dir);
search_path = win_path_wstring + L"\\" + search_path + L"\\bin";
AddDllDirectory(search_path.c_str());
}
#endif
std::vector<std::string> dso_names = split(dso_name, ";");
void* dso_handle = nullptr;
for (auto const& dso : dso_names) {
Expand Down Expand Up @@ -324,8 +350,26 @@ void* GetCublasDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib");
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cublas64_11.dll");
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
#endif
} else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cublas64_12.dll");
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
#endif
} else {
std::string warning_msg(
"Your CUDA_VERSION is less than 11 or greater than 12, paddle "
"temporarily no longer supports");
return nullptr;
}
#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
#ifdef WITH_PIP_CUDA_LIBRARIES
Expand Down Expand Up @@ -403,8 +447,23 @@ void* GetCUDNNDsoHandle() {
"Toolkit\\CUDA\\v10.0\n"
"You should do this according to your CUDA installation directory and "
"CUDNN version.");
return GetDsoHandleFromSearchPath(
FLAGS_cudnn_dir, win_cudnn_lib, true, {cuda_lib_path}, win_warn_meg);
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12030) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, "cudnn64_8.dll", true, {cuda_lib_path}, win_warn_meg);
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cudnn_lib, true, {cuda_lib_path}, win_warn_meg);
#endif
} else if (CUDA_VERSION >= 12030) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, "cudnn64_9.dll", true, {cuda_lib_path}, win_warn_meg);
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cudnn_lib, true, {cuda_lib_path}, win_warn_meg);
#endif
}
#elif defined(PADDLE_WITH_HIP)
return GetDsoHandleFromSearchPath(FLAGS_miopen_dir, "libMIOpen.so", false);
#else
Expand Down Expand Up @@ -461,8 +520,13 @@ void* GetCurandDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib");
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, "curand64_10.dll", true, {cuda_lib_path});
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_curand_lib, true, {cuda_lib_path});
#endif
#elif defined(PADDLE_WITH_HIP)
return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "libhiprand.so");
#else
Expand Down Expand Up @@ -500,8 +564,13 @@ void* GetCusolverDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.dylib");
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, "cusolver64_11.dll", true, {cuda_lib_path});
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cusolver_lib, true, {cuda_lib_path});
#endif
#else
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.so.11");
Expand All @@ -515,8 +584,26 @@ void* GetCusparseDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusparse.dylib");
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cusparse64_11.dll");
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
#endif
} else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cusparse64_12.dll");
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
#endif
} else {
std::string warning_msg(
"Your CUDA_VERSION is less than 11 or greater than 12, paddle "
"temporarily no longer supports");
return nullptr;
}
#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
#ifdef WITH_PIP_CUDA_LIBRARIES
Expand Down Expand Up @@ -709,8 +796,26 @@ void* GetCUFFTDsoHandle() {
return nullptr;
}
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cufft64_10.dll");
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
#endif
} else if (CUDA_VERSION >= 12000 && CUDA_VERSION <= 12030) {
#ifdef WITH_PIP_CUDA_LIBRARIES
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cufft64_11.dll");
#else
return GetDsoHandleFromSearchPath(
FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
#endif
} else {
std::string warning_msg(
"Your CUDA_VERSION is less than 11 or greater than 12, paddle "
"temporarily no longer supports");
return nullptr;
}
#else
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.so");
#endif
Expand Down
12 changes: 9 additions & 3 deletions paddle/phi/common/port.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <memory>
#include <stdexcept>
#include <string>

#include "glog/logging.h"

#if !defined(_WIN32)
Expand All @@ -42,7 +41,14 @@ void *dlsym(void *handle, const char *symbol_name) {

void *dlopen(const char *filename, int flag) {
std::string file_name(filename);
HMODULE hModule = LoadLibrary(file_name.c_str());
HMODULE hModule = nullptr;
#ifdef WITH_PIP_CUDA_LIBRARIES
hModule =
LoadLibraryEx(file_name.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS);
#endif
if (!hModule) {
hModule = LoadLibrary(file_name.c_str());
}
if (!hModule) {
if (flag) {
throw std::runtime_error(file_name + " not found.");
Expand Down Expand Up @@ -72,7 +78,7 @@ int gettimeofday(struct timeval *tp, void *tzp) {

return (0);
}
#endif // !_WIN32
#endif // !_WIN32

void ExecShellCommand(const std::string &cmd, std::string *message) {
std::array<char, 128> buffer;
Expand Down
113 changes: 113 additions & 0 deletions python/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@
if os.path.exists(cuh_file):
os.environ.setdefault('runtime_include_dir', runtime_include_dir)


if is_compiled_with_cuda():
import os
import platform
Expand Down Expand Up @@ -601,6 +602,118 @@
cupti_dir_lib_path = package_dir + "/.." + "/nvidia/cuda_cupti/lib"
set_flags({"FLAGS_cupti_dir": cupti_dir_lib_path})

elif (
platform.system() == 'Windows'
and platform.machine() in ('x86_64', 'AMD64')
and paddle.version.with_pip_cuda_libraries == 'ON'
):
package_dir = os.path.dirname(os.path.abspath(__file__))
win_cuda_bin_path = package_dir + "\\.." + "\\nvidia"
set_flags({"FLAGS_win_cuda_bin_dir": win_cuda_bin_path})

import sys

if sys.platform == 'win32':
pfiles_path = os.getenv('ProgramFiles', 'C:\\Program Files')
py_dll_path = os.path.join(sys.exec_prefix, 'Library', 'bin')
th_dll_path = os.path.join(os.path.dirname(__file__), 'libs')
site_cuda_base_path = os.path.join(
os.path.dirname(__file__), '..', 'nvidia'
)
site_cuda_list = [
"cublas",
"cuda_nvrtc",
"cuda_runtime",
"cudnn",
"cufft",
"curand",
"cusolver",
"cusparse",
"nvjitlink",
]

if sys.exec_prefix != sys.base_exec_prefix:
base_py_dll_path = os.path.join(
sys.base_exec_prefix, 'Library', 'bin'
)
else:
base_py_dll_path = ''

dll_paths = list(
filter(
os.path.exists, [th_dll_path, py_dll_path, base_py_dll_path]
)
)
for site_cuda_package in site_cuda_list:
site_cuda_path = os.path.join(
site_cuda_base_path, site_cuda_package, 'bin'
)
if os.path.exists(site_cuda_path):
dll_paths.append(site_cuda_path)

import ctypes

kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
prev_error_mode = kernel32.SetErrorMode(0x0001)

kernel32.LoadLibraryW.restype = ctypes.c_void_p
if with_load_library_flags:
kernel32.LoadLibraryExW.restype = ctypes.c_void_p

for dll_path in dll_paths:
os.add_dll_directory(dll_path)

try:
ctypes.CDLL('vcruntime140.dll')
ctypes.CDLL('msvcp140.dll')
ctypes.CDLL('vcruntime140_1.dll')
except OSError:
print(
'''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe'''
)
import glob

dlls = glob.glob(os.path.join(th_dll_path, '*.dll'))
for site_cuda_package in site_cuda_list:
site_cuda_path = os.path.join(
site_cuda_base_path, site_cuda_package, 'bin'
)
if os.path.exists(site_cuda_path):
dlls.extend(
glob.glob(os.path.join(site_cuda_path, '*.dll'))
)
# Not load 32 bit dlls in 64 bit python.
dlls = [dll for dll in dlls if '32_' not in dll]
path_patched = False
for dll in dlls:
is_loaded = False
print("dll:", dll)
if with_load_library_flags:
res = kernel32.LoadLibraryExW(dll, None, 0x00001100)
last_error = ctypes.get_last_error()
if res is None and last_error != 126:
err = ctypes.WinError(last_error)
err.strerror += f' Error loading "{dll}" or one of its dependencies.'
raise err
elif res is not None:
is_loaded = True
if not is_loaded:
if not path_patched:
prev_path = os.environ['PATH']
os.environ['PATH'] = ';'.join(
dll_paths + [os.environ['PATH']]
)
path_patched = True
res = kernel32.LoadLibraryW(dll)
if path_patched:
os.environ['PATH'] = prev_path
if res is None:
err = ctypes.WinError(ctypes.get_last_error())
err.strerror += f' Error loading "{dll}" or one of its dependencies.'
raise err
kernel32.SetErrorMode(prev_error_mode)

disable_static()

Expand Down
Loading