@@ -48,121 +48,93 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
4848 * include all needed cudnn functions in HPPL
4949 * different cudnn version has different interfaces
5050 **/
51- #define CUDNN_DNN_ROUTINE_EACH (__macro ) \
52- __macro (cudnnSetTensor4dDescriptor); \
53- __macro (cudnnSetTensor4dDescriptorEx); \
54- __macro (cudnnSetTensorNdDescriptor); \
55- __macro (cudnnGetTensorNdDescriptor); \
56- __macro (cudnnGetConvolutionNdForwardOutputDim); \
57- __macro (cudnnCreateTensorDescriptor); \
58- __macro (cudnnDestroyTensorDescriptor); \
59- __macro (cudnnCreateFilterDescriptor); \
60- __macro (cudnnSetFilter4dDescriptor); \
61- __macro (cudnnSetFilterNdDescriptor); \
62- __macro (cudnnGetFilterNdDescriptor); \
63- __macro (cudnnSetPooling2dDescriptor); \
64- __macro (cudnnSetPoolingNdDescriptor); \
65- __macro (cudnnGetPoolingNdDescriptor); \
66- __macro (cudnnDestroyFilterDescriptor); \
67- __macro (cudnnCreateConvolutionDescriptor); \
68- __macro (cudnnCreatePoolingDescriptor); \
69- __macro (cudnnDestroyPoolingDescriptor); \
70- __macro (cudnnSetConvolution2dDescriptor); \
71- __macro (cudnnDestroyConvolutionDescriptor); \
72- __macro (cudnnSetConvolutionNdDescriptor); \
73- __macro (cudnnGetConvolutionNdDescriptor); \
74- __macro (cudnnDeriveBNTensorDescriptor); \
75- __macro (cudnnCreateSpatialTransformerDescriptor); \
76- __macro (cudnnSetSpatialTransformerNdDescriptor); \
77- __macro (cudnnDestroySpatialTransformerDescriptor); \
78- __macro (cudnnSpatialTfGridGeneratorForward); \
79- __macro (cudnnSpatialTfGridGeneratorBackward); \
80- __macro (cudnnSpatialTfSamplerForward); \
81- __macro (cudnnSpatialTfSamplerBackward); \
82- __macro (cudnnCreate); \
83- __macro (cudnnDestroy); \
84- __macro (cudnnSetStream); \
85- __macro (cudnnActivationForward); \
86- __macro (cudnnActivationBackward); \
87- __macro (cudnnConvolutionForward); \
88- __macro (cudnnConvolutionBackwardBias); \
89- __macro (cudnnGetConvolutionForwardWorkspaceSize); \
90- __macro (cudnnTransformTensor); \
91- __macro (cudnnPoolingForward); \
92- __macro (cudnnPoolingBackward); \
93- __macro (cudnnSoftmaxBackward); \
94- __macro (cudnnSoftmaxForward); \
95- __macro (cudnnGetVersion); \
96- __macro (cudnnFindConvolutionForwardAlgorithmEx); \
97- __macro (cudnnFindConvolutionBackwardFilterAlgorithmEx); \
98- __macro (cudnnFindConvolutionBackwardFilterAlgorithm); \
99- __macro (cudnnFindConvolutionBackwardDataAlgorithmEx); \
100- __macro (cudnnGetErrorString); \
101- __macro (cudnnCreateDropoutDescriptor); \
102- __macro (cudnnDropoutGetStatesSize); \
103- __macro (cudnnSetDropoutDescriptor); \
104- __macro (cudnnRestoreDropoutDescriptor); \
105- __macro (cudnnCreateRNNDescriptor); \
106- __macro (cudnnGetRNNParamsSize); \
107- __macro (cudnnGetRNNWorkspaceSize); \
108- __macro (cudnnGetRNNTrainingReserveSize); \
109- __macro (cudnnRNNForwardTraining); \
110- __macro (cudnnRNNBackwardData); \
111- __macro (cudnnRNNBackwardWeights); \
112- __macro (cudnnRNNForwardInference); \
113- __macro (cudnnDestroyDropoutDescriptor); \
114- __macro (cudnnDestroyRNNDescriptor); \
115- __macro (cudnnSetTensorNdDescriptorEx);
116-
117- CUDNN_DNN_ROUTINE_EACH (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
118-
119- #define CUDNN_DNN_ROUTINE_EACH_R2 (__macro ) \
120- __macro (cudnnAddTensor); \
121- __macro (cudnnConvolutionBackwardData); \
122- __macro (cudnnConvolutionBackwardFilter);
123- CUDNN_DNN_ROUTINE_EACH_R2 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
124-
125- // APIs available after R3:
126- #if CUDNN_VERSION >= 3000
127- #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3 (__macro ) \
51+ #define CUDNN_DNN_ROUTINE_EACH (__macro ) \
52+ __macro (cudnnSetTensor4dDescriptor); \
53+ __macro (cudnnSetTensor4dDescriptorEx); \
54+ __macro (cudnnSetTensorNdDescriptor); \
55+ __macro (cudnnGetTensorNdDescriptor); \
56+ __macro (cudnnGetConvolutionNdForwardOutputDim); \
57+ __macro (cudnnCreateTensorDescriptor); \
58+ __macro (cudnnDestroyTensorDescriptor); \
59+ __macro (cudnnCreateFilterDescriptor); \
60+ __macro (cudnnSetFilter4dDescriptor); \
61+ __macro (cudnnSetFilterNdDescriptor); \
62+ __macro (cudnnGetFilterNdDescriptor); \
63+ __macro (cudnnSetPooling2dDescriptor); \
64+ __macro (cudnnSetPoolingNdDescriptor); \
65+ __macro (cudnnGetPoolingNdDescriptor); \
66+ __macro (cudnnDestroyFilterDescriptor); \
67+ __macro (cudnnCreateConvolutionDescriptor); \
68+ __macro (cudnnCreatePoolingDescriptor); \
69+ __macro (cudnnDestroyPoolingDescriptor); \
70+ __macro (cudnnSetConvolution2dDescriptor); \
71+ __macro (cudnnDestroyConvolutionDescriptor); \
72+ __macro (cudnnSetConvolutionNdDescriptor); \
73+ __macro (cudnnGetConvolutionNdDescriptor); \
74+ __macro (cudnnDeriveBNTensorDescriptor); \
75+ __macro (cudnnCreateSpatialTransformerDescriptor); \
76+ __macro (cudnnSetSpatialTransformerNdDescriptor); \
77+ __macro (cudnnDestroySpatialTransformerDescriptor); \
78+ __macro (cudnnSpatialTfGridGeneratorForward); \
79+ __macro (cudnnSpatialTfGridGeneratorBackward); \
80+ __macro (cudnnSpatialTfSamplerForward); \
81+ __macro (cudnnSpatialTfSamplerBackward); \
82+ __macro (cudnnCreate); \
83+ __macro (cudnnDestroy); \
84+ __macro (cudnnSetStream); \
85+ __macro (cudnnActivationForward); \
86+ __macro (cudnnActivationBackward); \
87+ __macro (cudnnConvolutionForward); \
88+ __macro (cudnnConvolutionBackwardBias); \
89+ __macro (cudnnGetConvolutionForwardWorkspaceSize); \
90+ __macro (cudnnTransformTensor); \
91+ __macro (cudnnPoolingForward); \
92+ __macro (cudnnPoolingBackward); \
93+ __macro (cudnnSoftmaxBackward); \
94+ __macro (cudnnSoftmaxForward); \
95+ __macro (cudnnGetVersion); \
96+ __macro (cudnnFindConvolutionForwardAlgorithmEx); \
97+ __macro (cudnnFindConvolutionBackwardFilterAlgorithmEx); \
98+ __macro (cudnnFindConvolutionBackwardFilterAlgorithm); \
99+ __macro (cudnnFindConvolutionBackwardDataAlgorithmEx); \
100+ __macro (cudnnGetErrorString); \
101+ __macro (cudnnCreateDropoutDescriptor); \
102+ __macro (cudnnDropoutGetStatesSize); \
103+ __macro (cudnnSetDropoutDescriptor); \
104+ __macro (cudnnRestoreDropoutDescriptor); \
105+ __macro (cudnnCreateRNNDescriptor); \
106+ __macro (cudnnGetRNNParamsSize); \
107+ __macro (cudnnGetRNNWorkspaceSize); \
108+ __macro (cudnnGetRNNTrainingReserveSize); \
109+ __macro (cudnnRNNForwardTraining); \
110+ __macro (cudnnRNNBackwardData); \
111+ __macro (cudnnRNNBackwardWeights); \
112+ __macro (cudnnRNNForwardInference); \
113+ __macro (cudnnDestroyDropoutDescriptor); \
114+ __macro (cudnnDestroyRNNDescriptor); \
115+ __macro (cudnnSetTensorNdDescriptorEx); \
116+ __macro (cudnnAddTensor); \
117+ __macro (cudnnConvolutionBackwardData); \
118+ __macro (cudnnConvolutionBackwardFilter); \
128119 __macro (cudnnGetConvolutionBackwardFilterWorkspaceSize); \
129- __macro (cudnnGetConvolutionBackwardDataWorkspaceSize);
130- CUDNN_DNN_ROUTINE_EACH_AFTER_R3 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
131- #endif
120+ __macro (cudnnGetConvolutionBackwardDataWorkspaceSize); \
121+ __macro (cudnnBatchNormalizationForwardTraining); \
122+ __macro (cudnnBatchNormalizationForwardInference); \
123+ __macro (cudnnBatchNormalizationBackward); \
124+ __macro (cudnnCreateActivationDescriptor); \
125+ __macro (cudnnSetActivationDescriptor); \
126+ __macro (cudnnGetActivationDescriptor); \
127+ __macro (cudnnDestroyActivationDescriptor); \
128+ __macro (cudnnSetRNNDescriptor_v6);
129+ CUDNN_DNN_ROUTINE_EACH (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
132130
133- // APIs available after R3:
134- #if CUDNN_VERSION >= 3000 && CUDNN_VERSION < 8000
135- #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8 (__macro ) \
131+ #if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000
132+ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8 (__macro ) \
136133 __macro (cudnnGetConvolutionBackwardFilterAlgorithm); \
137134 __macro (cudnnGetConvolutionForwardAlgorithm); \
138135 __macro (cudnnGetConvolutionBackwardDataAlgorithm); \
139136 __macro (cudnnSetRNNDescriptor);
140- CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
141- #endif
142-
143- // APIs available after R4:
144- #if CUDNN_VERSION >= 4007
145- #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4 (__macro ) \
146- __macro (cudnnBatchNormalizationForwardTraining); \
147- __macro (cudnnBatchNormalizationForwardInference); \
148- __macro (cudnnBatchNormalizationBackward);
149- CUDNN_DNN_ROUTINE_EACH_AFTER_R4 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
150- #endif
151-
152- // APIs in R5
153- #if CUDNN_VERSION >= 5000
154- #define CUDNN_DNN_ROUTINE_EACH_R5 (__macro ) \
155- __macro (cudnnCreateActivationDescriptor); \
156- __macro (cudnnSetActivationDescriptor); \
157- __macro (cudnnGetActivationDescriptor); \
158- __macro (cudnnDestroyActivationDescriptor);
159- CUDNN_DNN_ROUTINE_EACH_R5 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
160- #endif
161-
162- // APIs in R6
163- #if CUDNN_VERSION >= 6000
164- #define CUDNN_DNN_ROUTINE_EACH_R6 (__macro ) __macro(cudnnSetRNNDescriptor_v6);
165- CUDNN_DNN_ROUTINE_EACH_R6 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
137+ CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8 (DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
166138#endif
167139
168140#if CUDNN_VERSION >= 7001
0 commit comments