Skip to content

Commit 025f162

Browse files
committed
load_all_dll_once
1 parent 1048206 commit 025f162

7 files changed

Lines changed: 338 additions & 42 deletions

File tree

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ if(WITH_GPU AND WITH_ROCM)
9999
endif()
100100

101101
if(WITH_GPU AND NOT APPLE)
102+
if(WIN32)
103+
add_definitions(-DWITH_PIP_CUDA_LIBRARIES)
104+
set(WITH_PIP_CUDA_LIBRARIES
105+
ON
106+
CACHE BOOL "" FORCE)
107+
endif()
102108
#(Note risemeup1): The cudart dynamic library libcudart.so is used by set CUDA_USE_STATIC_CUDA_RUNTIME and CMAKE_CUDA_FLAGS
103109
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL
104110
"x86_64")

paddle/common/flags.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,3 +1713,7 @@ PHI_DEFINE_EXPORTED_string(cusolver_dir, // NOLINT
17131713
PHI_DEFINE_EXPORTED_string(cusparse_dir, // NOLINT
17141714
"",
17151715
"Specify path for loading libcusparse.so.*.");
1716+
PHI_DEFINE_EXPORTED_string(
1717+
win_cuda_bin_dir, // NOLINT
1718+
"",
1719+
"Specify path for loading *.dll about cuda on windows");

paddle/phi/backends/dynload/dynamic_loader.cc

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ limitations under the License. */
1414
#include "paddle/phi/backends/dynload/dynamic_loader.h"
1515
#include <dirent.h>
1616

17+
#include <codecvt>
1718
#include <cstdlib>
1819
#include <string>
1920
#include <vector>
@@ -45,6 +46,7 @@ COMMON_DECLARE_string(cusparselt_dir);
4546
COMMON_DECLARE_string(curand_dir);
4647
COMMON_DECLARE_string(cusolver_dir);
4748
COMMON_DECLARE_string(cusparse_dir);
49+
COMMON_DECLARE_string(win_cuda_bin_dir);
4850
#ifdef PADDLE_WITH_HIP
4951

5052
PHI_DEFINE_string(miopen_dir,
@@ -132,8 +134,12 @@ static constexpr char* win_cufft_lib =
132134

133135
static inline std::string join(const std::string& part1,
134136
const std::string& part2) {
135-
// directory separator
137+
// directory separator
138+
#if defined(_WIN32)
139+
const char sep = '\\';
140+
#else
136141
const char sep = '/';
142+
#endif
137143
if (!part2.empty() && part2.front() == sep) {
138144
return part2;
139145
}
@@ -263,6 +269,26 @@ static inline void* GetDsoHandleFromSearchPath(
263269
#else
264270
int dynload_flags = 0;
265271
#endif // !_WIN32
272+
#if defined(_WIN32)
273+
std::vector<std::wstring> cuda_bin_search_path = {
274+
L"cublas",
275+
L"cuda_nvrtc",
276+
L"cuda_runtime",
277+
L"cudnn",
278+
L"cufft",
279+
L"curand",
280+
L"cusolver",
281+
L"cusparse",
282+
L"nvjitlink",
283+
};
284+
for (auto search_path : cuda_bin_search_path) {
285+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
286+
std::wstring win_path_wstring =
287+
converter.from_bytes(FLAGS_win_cuda_bin_dir);
288+
search_path = win_path_wstring + L"\\" + search_path + L"\\bin";
289+
AddDllDirectory(search_path.c_str());
290+
}
291+
#endif
266292
std::vector<std::string> dso_names = split(dso_name, ";");
267293
void* dso_handle = nullptr;
268294
for (auto const& dso : dso_names) {
@@ -324,8 +350,26 @@ void* GetCublasDsoHandle() {
324350
#if defined(__APPLE__) || defined(__OSX__)
325351
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib");
326352
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
327-
return GetDsoHandleFromSearchPath(
328-
FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
353+
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
354+
#ifdef WITH_PIP_CUDA_LIBRARIES
355+
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cublas64_11.dll");
356+
#else
357+
return GetDsoHandleFromSearchPath(
358+
FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
359+
#endif
360+
} else if (CUDA_VERSION >= 12000 && CUDA_VERSION < 12030) {
361+
#ifdef WITH_PIP_CUDA_LIBRARIES
362+
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cublas64_12.dll");
363+
#else
364+
return GetDsoHandleFromSearchPath(
365+
FLAGS_cuda_dir, win_cublas_lib, true, {cuda_lib_path});
366+
#endif
367+
} else {
368+
std::string warning_msg(
369+
"Your CUDA_VERSION is less than 11 or greater than 12, paddle "
370+
"temporarily no longer supports");
371+
return nullptr;
372+
}
329373
#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
330374
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
331375
#ifdef WITH_PIP_CUDA_LIBRARIES
@@ -403,8 +447,13 @@ void* GetCUDNNDsoHandle() {
403447
"Toolkit\\CUDA\\v10.0\n"
404448
"You should do this according to your CUDA installation directory and "
405449
"CUDNN version.");
450+
#ifdef WITH_PIP_CUDA_LIBRARIES
451+
return GetDsoHandleFromSearchPath(
452+
FLAGS_cuda_dir, "cudnn64_8.dll", true, {cuda_lib_path}, win_warn_meg);
453+
#else
406454
return GetDsoHandleFromSearchPath(
407-
FLAGS_cudnn_dir, win_cudnn_lib, true, {cuda_lib_path}, win_warn_meg);
455+
FLAGS_cuda_dir, win_cudnn_lib, true, {cuda_lib_path}, win_warn_meg);
456+
#endif
408457
#elif defined(PADDLE_WITH_HIP)
409458
return GetDsoHandleFromSearchPath(FLAGS_miopen_dir, "libMIOpen.so", false);
410459
#else
@@ -461,8 +510,13 @@ void* GetCurandDsoHandle() {
461510
#if defined(__APPLE__) || defined(__OSX__)
462511
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib");
463512
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
513+
#ifdef WITH_PIP_CUDA_LIBRARIES
514+
return GetDsoHandleFromSearchPath(
515+
FLAGS_cuda_dir, "curand64_10.dll", true, {cuda_lib_path});
516+
#else
464517
return GetDsoHandleFromSearchPath(
465518
FLAGS_cuda_dir, win_curand_lib, true, {cuda_lib_path});
519+
#endif
466520
#elif defined(PADDLE_WITH_HIP)
467521
return GetDsoHandleFromSearchPath(FLAGS_rocm_dir, "libhiprand.so");
468522
#else
@@ -500,8 +554,13 @@ void* GetCusolverDsoHandle() {
500554
#if defined(__APPLE__) || defined(__OSX__)
501555
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.dylib");
502556
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
557+
#ifdef WITH_PIP_CUDA_LIBRARIES
558+
return GetDsoHandleFromSearchPath(
559+
FLAGS_cuda_dir, "cusolver64_11.dll", true, {cuda_lib_path});
560+
#else
503561
return GetDsoHandleFromSearchPath(
504562
FLAGS_cuda_dir, win_cusolver_lib, true, {cuda_lib_path});
563+
#endif
505564
#else
506565
#ifdef WITH_PIP_CUDA_LIBRARIES
507566
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusolver.so.11");
@@ -515,8 +574,26 @@ void* GetCusparseDsoHandle() {
515574
#if defined(__APPLE__) || defined(__OSX__)
516575
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcusparse.dylib");
517576
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
518-
return GetDsoHandleFromSearchPath(
519-
FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
577+
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
578+
#ifdef WITH_PIP_CUDA_LIBRARIES
579+
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cusparse64_11.dll");
580+
#else
581+
return GetDsoHandleFromSearchPath(
582+
FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
583+
#endif
584+
} else if (CUDA_VERSION >= 12000 && CUDA_VERSION < 12030) {
585+
#ifdef WITH_PIP_CUDA_LIBRARIES
586+
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cusparse64_12.dll");
587+
#else
588+
return GetDsoHandleFromSearchPath(
589+
FLAGS_cuda_dir, win_cusparse_lib, true, {cuda_lib_path});
590+
#endif
591+
} else {
592+
std::string warning_msg(
593+
"Your CUDA_VERSION is less than 11 or greater than 12, paddle "
594+
"temporarily no longer supports");
595+
return nullptr;
596+
}
520597
#elif defined(__linux__) && defined(PADDLE_WITH_CUDA)
521598
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
522599
#ifdef WITH_PIP_CUDA_LIBRARIES
@@ -709,8 +786,26 @@ void* GetCUFFTDsoHandle() {
709786
return nullptr;
710787
}
711788
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
712-
return GetDsoHandleFromSearchPath(
713-
FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
789+
if (CUDA_VERSION >= 11000 && CUDA_VERSION < 12000) {
790+
#ifdef WITH_PIP_CUDA_LIBRARIES
791+
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cufft64_10.dll");
792+
#else
793+
return GetDsoHandleFromSearchPath(
794+
FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
795+
#endif
796+
} else if (CUDA_VERSION >= 12000 && CUDA_VERSION < 12030) {
797+
#ifdef WITH_PIP_CUDA_LIBRARIES
798+
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "cufft64_11.dll");
799+
#else
800+
return GetDsoHandleFromSearchPath(
801+
FLAGS_cuda_dir, win_cufft_lib, true, {cuda_lib_path});
802+
#endif
803+
} else {
804+
std::string warning_msg(
805+
"Your CUDA_VERSION is less than 11 or greater than 12, paddle "
806+
"temporarily no longer supports");
807+
return nullptr;
808+
}
714809
#else
715810
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcufft.so");
716811
#endif

paddle/phi/common/port.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include <memory>
1919
#include <stdexcept>
2020
#include <string>
21-
2221
#include "glog/logging.h"
2322

2423
#if !defined(_WIN32)
@@ -42,7 +41,14 @@ void *dlsym(void *handle, const char *symbol_name) {
4241

4342
void *dlopen(const char *filename, int flag) {
4443
std::string file_name(filename);
45-
HMODULE hModule = LoadLibrary(file_name.c_str());
44+
HMODULE hModule = nullptr;
45+
#ifdef WITH_PIP_CUDA_LIBRARIES
46+
hModule =
47+
LoadLibraryEx(file_name.c_str(), NULL, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS);
48+
#endif
49+
if (!hModule) {
50+
hModule = LoadLibrary(file_name.c_str());
51+
}
4652
if (!hModule) {
4753
if (flag) {
4854
throw std::runtime_error(file_name + " not found.");
@@ -72,7 +78,7 @@ int gettimeofday(struct timeval *tp, void *tzp) {
7278

7379
return (0);
7480
}
75-
#endif // !_WIN32
81+
#endif // !_WIN32
7682

7783
void ExecShellCommand(const std::string &cmd, std::string *message) {
7884
std::array<char, 128> buffer;

python/paddle/__init__.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,7 @@
570570
if os.path.exists(cuh_file):
571571
os.environ.setdefault('runtime_include_dir', runtime_include_dir)
572572

573+
573574
if is_compiled_with_cuda():
574575
import os
575576
import platform
@@ -601,6 +602,138 @@
601602
cupti_dir_lib_path = package_dir + "/.." + "/nvidia/cuda_cupti/lib"
602603
set_flags({"FLAGS_cupti_dir": cupti_dir_lib_path})
603604

605+
elif (
606+
platform.system() == 'Windows'
607+
and platform.machine() in ('x86_64', 'AMD64')
608+
and paddle.version.with_pip_cuda_libraries == 'ON'
609+
):
610+
package_dir = os.path.dirname(os.path.abspath(__file__))
611+
win_cuda_bin_path = package_dir + "\\.." + "\\nvidia"
612+
set_flags({"FLAGS_win_cuda_bin_dir": win_cuda_bin_path})
613+
614+
import sys
615+
616+
if sys.platform == 'win32':
617+
pfiles_path = os.getenv('ProgramFiles', 'C:\\Program Files')
618+
py_dll_path = os.path.join(sys.exec_prefix, 'Library', 'bin')
619+
th_dll_path = os.path.join(os.path.dirname(__file__), 'libs')
620+
sit_cuda_base_path = os.path.join(
621+
os.path.dirname(__file__), '..', 'nvidia'
622+
)
623+
site_cuda_list = [
624+
"cublas",
625+
"cuda_nvrtc",
626+
"cuda_runtime",
627+
"cudnn",
628+
"cufft",
629+
"curand",
630+
"cusolver",
631+
"cusparse",
632+
"nvjitlink",
633+
]
634+
635+
if sys.exec_prefix != sys.base_exec_prefix:
636+
base_py_dll_path = os.path.join(
637+
sys.base_exec_prefix, 'Library', 'bin'
638+
)
639+
else:
640+
base_py_dll_path = ''
641+
642+
dll_paths = list(
643+
filter(
644+
os.path.exists, [th_dll_path, py_dll_path, base_py_dll_path]
645+
)
646+
)
647+
for sit_cuda_package in site_cuda_list:
648+
site_cuda_path = os.path.join(
649+
sit_cuda_base_path, sit_cuda_package, 'bin'
650+
)
651+
if os.path.exists(site_cuda_path):
652+
dll_paths.append(site_cuda_path)
653+
654+
cuda_version = paddle.version.cuda_version
655+
cuda_path_var = 'CUDA_PATH_V' + cuda_version.replace('.', '_')
656+
default_path = os.path.join(
657+
pfiles_path,
658+
'NVIDIA GPU Computing Toolkit',
659+
'CUDA',
660+
'v' + cuda_version,
661+
)
662+
cuda_path = os.path.join(os.getenv(cuda_path_var), 'bin')
663+
664+
dll_paths.extend(filter(os.path.exists, [cuda_path]))
665+
666+
import ctypes
667+
668+
kernel32 = ctypes.WinDLL('kernel32.dll', use_last_error=True)
669+
with_load_library_flags = hasattr(kernel32, 'AddDllDirectory')
670+
prev_error_mode = kernel32.SetErrorMode(0x0001)
671+
672+
kernel32.LoadLibraryW.restype = ctypes.c_void_p
673+
if with_load_library_flags:
674+
kernel32.LoadLibraryExW.restype = ctypes.c_void_p
675+
676+
for dll_path in dll_paths:
677+
os.add_dll_directory(dll_path)
678+
679+
try:
680+
ctypes.CDLL('vcruntime140.dll')
681+
ctypes.CDLL('msvcp140.dll')
682+
ctypes.CDLL('vcruntime140_1.dll')
683+
except OSError:
684+
print(
685+
'''Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure.
686+
It can be downloaded at https://aka.ms/vs/16/release/vc_redist.x64.exe'''
687+
)
688+
import glob
689+
690+
dlls = glob.glob(os.path.join(th_dll_path, '*.dll'))
691+
site_cuda_path = [
692+
"cublas",
693+
"cuda_nvrtc",
694+
"cuda_runtime",
695+
"cudnn",
696+
"cufft",
697+
"curand",
698+
"cusolver",
699+
"cusparse",
700+
"nvjitlink",
701+
]
702+
703+
dlls = glob.glob(os.path.join(th_dll_path, '*.dll'))
704+
# dlls.extend(glob.glob(os.path.join(sit_cuda_base_path, '*.dll')))
705+
for sit_cuda_package in site_cuda_list:
706+
site_cuda_path = os.path.join(
707+
sit_cuda_base_path, sit_cuda_package, 'bin'
708+
)
709+
if os.path.exists(site_cuda_path):
710+
dlls.extend(
711+
glob.glob(os.path.join(site_cuda_path, '*.dll'))
712+
)
713+
path_patched = False
714+
for dll in dlls:
715+
is_loaded = False
716+
if with_load_library_flags:
717+
res = ctypes.cdll.LoadLibrary(dll)
718+
last_error = ctypes.get_last_error()
719+
if res is None and last_error != 126:
720+
err = ctypes.WinError(last_error)
721+
err.strerror += f' Error loading "{dll}" or one of its dependencies.'
722+
raise err
723+
elif res is not None:
724+
is_loaded = True
725+
if not is_loaded:
726+
if not path_patched:
727+
os.environ['PATH'] = ';'.join(
728+
dll_paths + [os.environ['PATH']]
729+
)
730+
path_patched = True
731+
res = kernel32.LoadLibraryW(dll)
732+
if res is None:
733+
err = ctypes.WinError(ctypes.get_last_error())
734+
err.strerror += f' Error loading "{dll}" or one of its dependencies.'
735+
raise err
736+
kernel32.SetErrorMode(prev_error_mode)
604737

605738
disable_static()
606739

0 commit comments

Comments
 (0)