From 1dca5e0961cd38f33fa31db9d963673a49551233 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Sat, 27 Jan 2024 16:17:46 +0800
Subject: [PATCH 01/20] replace cc_test with paddle_test

---
 test/cpp/fluid/fused/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt
index 3f3ebc73a796dd..685bfc96f795f2 100644
--- a/test/cpp/fluid/fused/CMakeLists.txt
+++ b/test/cpp/fluid/fused/CMakeLists.txt
@@ -1,7 +1,7 @@
 if(WITH_GPU OR WITH_ROCM)
   # fusion_group
   if(NOT APPLE AND NOT WIN32)
-    cc_test(
+    paddle_test(
       test_fusion_group_op
       SRCS fusion_group_op_test.cc
       DEPS fusion_group_op)
@@ -42,7 +42,7 @@ if(WITH_GPU OR WITH_ROCM)
   endif()
   # resnet_unit needs cudnn 8.0 above
   if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000))
-    cc_test(
+    paddle_test(
       test_cudnn_norm_conv
       SRCS cudnn_norm_conv_test.cc
       DEPS generated_op
@@ -52,7 +52,7 @@ if(WITH_GPU OR WITH_ROCM)
            device_context
            phi
            common)
-    cc_test(
+    paddle_test(
       test_cudnn_bn_add_relu
       SRCS cudnn_bn_add_relu_test.cc
       DEPS batch_norm_op

From 72b91eb2ca9b0cea0381aff7a5236f994220c899 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Sat, 27 Jan 2024 16:59:54 +0800
Subject: [PATCH 02/20] fix codestyle

---
 test/cpp/fluid/fused/CMakeLists.txt | 42 +++++++++++++++--------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt
index 685bfc96f795f2..e50cf5f193bcf2 100644
--- a/test/cpp/fluid/fused/CMakeLists.txt
+++ b/test/cpp/fluid/fused/CMakeLists.txt
@@ -1,10 +1,8 @@
 if(WITH_GPU OR WITH_ROCM)
   # fusion_group
   if(NOT APPLE AND NOT WIN32)
-    paddle_test(
-      test_fusion_group_op
-      SRCS fusion_group_op_test.cc
-      DEPS fusion_group_op)
+    paddle_test(test_fusion_group_op SRCS fusion_group_op_test.cc DEPS
+                fusion_group_op)
   endif()
   if(NOT WITH_ROCM)
     nv_test(
@@ -44,23 +42,27 @@ if(WITH_GPU OR WITH_ROCM)
   if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000))
     paddle_test(
       test_cudnn_norm_conv
-      SRCS cudnn_norm_conv_test.cc
-      DEPS generated_op
-           depthwise_conv
-           tensor
-           op_registry
-           device_context
-           phi
-           common)
+      SRCS
+      cudnn_norm_conv_test.cc
+      DEPS
+      generated_op
+      depthwise_conv
+      tensor
+      op_registry
+      device_context
+      phi
+      common)
     paddle_test(
       test_cudnn_bn_add_relu
-      SRCS cudnn_bn_add_relu_test.cc
-      DEPS batch_norm_op
-           fused_bn_add_activation_op
-           tensor
-           op_registry
-           device_context
-           phi
-           common)
+      SRCS
+      cudnn_bn_add_relu_test.cc
+      DEPS
+      batch_norm_op
+      fused_bn_add_activation_op
+      tensor
+      op_registry
+      device_context
+      phi
+      common)
   endif()
 endif()

From e6537caebe130814252b71bf9ce120a0cbf69450 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Sun, 18 Feb 2024 11:17:47 +0800
Subject: [PATCH 03/20] fix bug

---
 test/cpp/fluid/fused/CMakeLists.txt | 29 +++--------------------------
 1 file changed, 3 insertions(+), 26 deletions(-)

diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt
index e50cf5f193bcf2..324987ede660d0 100644
--- a/test/cpp/fluid/fused/CMakeLists.txt
+++ b/test/cpp/fluid/fused/CMakeLists.txt
@@ -1,8 +1,7 @@
 if(WITH_GPU OR WITH_ROCM)
   # fusion_group
   if(NOT APPLE AND NOT WIN32)
-    paddle_test(test_fusion_group_op SRCS fusion_group_op_test.cc DEPS
-                fusion_group_op)
+    paddle_test(test_fusion_group_op SRCS fusion_group_op_test.cc)
   endif()
   if(NOT WITH_ROCM)
     nv_test(
@@ -40,29 +39,7 @@ if(WITH_GPU OR WITH_ROCM)
   endif()
   # resnet_unit needs cudnn 8.0 above
   if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000))
-    paddle_test(
-      test_cudnn_norm_conv
-      SRCS
-      cudnn_norm_conv_test.cc
-      DEPS
-      generated_op
-      depthwise_conv
-      tensor
-      op_registry
-      device_context
-      phi
-      common)
-    paddle_test(
-      test_cudnn_bn_add_relu
-      SRCS
-      cudnn_bn_add_relu_test.cc
-      DEPS
-      batch_norm_op
-      fused_bn_add_activation_op
-      tensor
-      op_registry
-      device_context
-      phi
-      common)
+    paddle_test(test_cudnn_norm_conv SRCS cudnn_norm_conv_test.cc)
+    paddle_test(test_cudnn_bn_add_relu SRCS cudnn_bn_add_relu_test.cc)
   endif()
 endif()

From 58c2c8517873375c98e4fd35e16321367fdf0a9d Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Sat, 24 Feb 2024 19:55:37 +0800
Subject: [PATCH 04/20] fix unresolved external symbol

---
 paddle/fluid/platform/dynload/dynamic_loader.h | 3 ++-
 paddle/phi/backends/dynload/cudnn.h            | 6 +++---
 paddle/phi/backends/dynload/port.h             | 2 +-
 paddle/phi/core/device_context.h               | 6 +++---
 paddle/phi/core/enforce.h                      | 2 +-
 test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc | 7 -------
 6 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h
index 93a19645a0a34e..7082b20f3ae675 100644
--- a/paddle/fluid/platform/dynload/dynamic_loader.h
+++ b/paddle/fluid/platform/dynload/dynamic_loader.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <string>
+#include "paddle/utils/test_macros.h"
 
 namespace paddle {
 namespace platform {
@@ -27,7 +28,7 @@ namespace dynload {
 
 void* GetCublasDsoHandle();
 void* GetCublasLtDsoHandle();
-void* GetCUDNNDsoHandle();
+TEST_API void* GetCUDNNDsoHandle();
 void* GetCUPTIDsoHandle();
 void* GetCurandDsoHandle();
 void* GetNvjpegDsoHandle();
diff --git a/paddle/phi/backends/dynload/cudnn.h b/paddle/phi/backends/dynload/cudnn.h
index 3292beb0371107..72113826a3e54c 100644
--- a/paddle/phi/backends/dynload/cudnn.h
+++ b/paddle/phi/backends/dynload/cudnn.h
@@ -24,11 +24,11 @@ limitations under the License. */
 namespace phi {
 namespace dynload {
 
-extern std::once_flag cudnn_dso_flag;
-extern void* cudnn_dso_handle;
+TEST_API extern std::once_flag cudnn_dso_flag;
+TEST_API extern void* cudnn_dso_handle;
 extern bool HasCUDNN();
 
-extern void EnforceCUDNNLoaded(const char* fn_name);
+TEST_API extern void EnforceCUDNNLoaded(const char* fn_name);
 #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)                      \
   struct DynLoad__##__name {                                         \
     template <typename... Args>                                      \
diff --git a/paddle/phi/backends/dynload/port.h b/paddle/phi/backends/dynload/port.h
index 03a2863e4dc4ee..476eb20c8534b6 100644
--- a/paddle/phi/backends/dynload/port.h
+++ b/paddle/phi/backends/dynload/port.h
@@ -38,7 +38,7 @@
 #define S_ISDIR(mode) (((mode)&S_IFMT) == S_IFDIR)
 #endif  // S_ISDIR
 
-void *dlsym(void *handle, const char *symbol_name);
+TEST_API void *dlsym(void *handle, const char *symbol_name);
 
 void *dlopen(const char *filename, int flag);
 
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index e0d555fe01b3e0..f8ec9ad72840e6 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -152,9 +152,9 @@ class PADDLE_API DeviceContext {
                       bool fake_alloc = false) const;
 
   template <typename T>
-  T* Alloc(TensorBase* tensor,
-           size_t requested_size = 0,
-           bool pinned = false) const;
+  TEST_API T* Alloc(TensorBase* tensor,
+                    size_t requested_size = 0,
+                    bool pinned = false) const;
 
   /**
    * @brief Allocate host memory for tensor.
diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h
index feb2852a9dc679..8ffaaf6d3fbf21 100644
--- a/paddle/phi/core/enforce.h
+++ b/paddle/phi/core/enforce.h
@@ -565,7 +565,7 @@ DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess);
 }  // namespace details
 
 template <typename T>
-std::string GetExternalErrorMsg(T status);
+TEST_API std::string GetExternalErrorMsg(T status);
 
 /*************** CUDA ERROR ***************/
 inline bool is_error(cudaError_t e) { return e != cudaSuccess; }
diff --git a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
index ae4697833d7584..63f8f5532df31b 100644
--- a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
+++ b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
@@ -33,13 +33,6 @@ namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace op = paddle::operators;
 
-USE_OP_ITSELF(batch_norm);
-USE_OP_ITSELF(fused_bn_add_activation);
-USE_OP_ITSELF(fused_bn_add_activation_grad);
-PD_DECLARE_KERNEL(batch_norm, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(fused_bn_add_activation, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(fused_bn_add_activation_grad, GPU, ALL_LAYOUT);
-
 template <typename T>
 void InitRandomTensor(const std::vector<int64_t> &dims,
                       phi::DenseTensor *cpu_out) {

From d87ced3d7d4e7a71bd07823d6302007bad2d2851 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Mon, 26 Feb 2024 20:58:08 +0800
Subject: [PATCH 05/20] fix bug

---
 paddle/fluid/platform/dynload/dynamic_loader.h | 3 +--
 paddle/phi/backends/dynload/dynamic_loader.h   | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h
index 7082b20f3ae675..93a19645a0a34e 100644
--- a/paddle/fluid/platform/dynload/dynamic_loader.h
+++ b/paddle/fluid/platform/dynload/dynamic_loader.h
@@ -14,7 +14,6 @@ limitations under the License. */
 
 #pragma once
 #include <string>
-#include "paddle/utils/test_macros.h"
 
 namespace paddle {
 namespace platform {
@@ -28,7 +27,7 @@ namespace dynload {
 
 void* GetCublasDsoHandle();
 void* GetCublasLtDsoHandle();
-TEST_API void* GetCUDNNDsoHandle();
+void* GetCUDNNDsoHandle();
 void* GetCUPTIDsoHandle();
 void* GetCurandDsoHandle();
 void* GetNvjpegDsoHandle();
diff --git a/paddle/phi/backends/dynload/dynamic_loader.h b/paddle/phi/backends/dynload/dynamic_loader.h
index 6ddeb1386410f0..b71a8fe976cbb2 100644
--- a/paddle/phi/backends/dynload/dynamic_loader.h
+++ b/paddle/phi/backends/dynload/dynamic_loader.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <string>
-
+#include "paddle/utils/test_macros.h"
 namespace phi {
 namespace dynload {
 
@@ -26,7 +26,7 @@ namespace dynload {
 
 void* GetCublasDsoHandle();
 void* GetCublasLtDsoHandle();
-void* GetCUDNNDsoHandle();
+TEST_API void* GetCUDNNDsoHandle();
 void* GetCUPTIDsoHandle();
 void* GetCurandDsoHandle();
 void* GetNvjpegDsoHandle();

From 6e1bd9b482521f67906696e1cedb04e8d55dce1b Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Fri, 1 Mar 2024 17:51:21 +0800
Subject: [PATCH 06/20] fix bug

---
 paddle/phi/backends/dynload/port.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/phi/backends/dynload/port.h b/paddle/phi/backends/dynload/port.h
index 476eb20c8534b6..a56479e7a471ab 100644
--- a/paddle/phi/backends/dynload/port.h
+++ b/paddle/phi/backends/dynload/port.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <string>
+#include "paddle/utils/test_macros.h"
 
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
 

From f9824b6f1ed303fa8e53def1536fcf260d41e1fa Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Mon, 4 Mar 2024 19:21:29 +0800
Subject: [PATCH 07/20] fix bug

---
 paddle/phi/backends/gpu/gpu_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/phi/backends/gpu/gpu_context.h b/paddle/phi/backends/gpu/gpu_context.h
index 8cd0d414bc105b..26905c31a0d52c 100644
--- a/paddle/phi/backends/gpu/gpu_context.h
+++ b/paddle/phi/backends/gpu/gpu_context.h
@@ -69,7 +69,7 @@ class DnnWorkspaceHandle {
 
   void ResetWorkspace();
 
-  void ReallocWorkspace(size_t required_workspace_bytes);
+  TEST_API void ReallocWorkspace(size_t required_workspace_bytes);
 
   DnnWorkspaceHandle(DnnWorkspaceHandle&&) = default;
   DnnWorkspaceHandle& operator=(DnnWorkspaceHandle&&) = delete;

From 378bc535ec809c6bb5ea4c248ed1c555abb2c1e9 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Wed, 6 Mar 2024 20:39:53 +0800
Subject: [PATCH 08/20] fix bug

---
 paddle/fluid/memory/allocation/system_allocator.h | 5 +++--
 paddle/phi/core/device_context.h                  | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/paddle/fluid/memory/allocation/system_allocator.h b/paddle/fluid/memory/allocation/system_allocator.h
index 67376a3e39a224..14fd7cc5fc8789 100644
--- a/paddle/fluid/memory/allocation/system_allocator.h
+++ b/paddle/fluid/memory/allocation/system_allocator.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <stddef.h>  // for size_t
 
 #include <string>
+#include "paddle/utils/test_macros.h"
 
 namespace paddle {
 namespace memory {
@@ -48,7 +49,7 @@ class GPUAllocator : public SystemAllocator {
  public:
   explicit GPUAllocator(int gpu_id) : gpu_id_(gpu_id) {}
 
-  virtual void* Alloc(size_t* index, size_t size);
+  TEST_API virtual void* Alloc(size_t* index, size_t size);
   virtual void Free(void* p, size_t size, size_t index);
   virtual bool UseGpu() const;
 
@@ -59,7 +60,7 @@ class GPUAllocator : public SystemAllocator {
 
 class CUDAPinnedAllocator : public SystemAllocator {
  public:
-  virtual void* Alloc(size_t* index, size_t size);
+  TEST_API virtual void* Alloc(size_t* index, size_t size);
   virtual void Free(void* p, size_t size, size_t index);
   virtual bool UseGpu() const;
 
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index f8ec9ad72840e6..e0d555fe01b3e0 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -152,9 +152,9 @@ class PADDLE_API DeviceContext {
                       bool fake_alloc = false) const;
 
   template <typename T>
-  TEST_API T* Alloc(TensorBase* tensor,
-                    size_t requested_size = 0,
-                    bool pinned = false) const;
+  T* Alloc(TensorBase* tensor,
+           size_t requested_size = 0,
+           bool pinned = false) const;
 
   /**
    * @brief Allocate host memory for tensor.

From 8d9c0fb3a25731af7343ebbe093551a5e2749a80 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Fri, 8 Mar 2024 21:15:29 +0800
Subject: [PATCH 09/20] fix bug

---
 paddle/fluid/memory/allocation/system_allocator.h | 5 ++---
 paddle/phi/core/device_context.cc                 | 2 +-
 paddle/phi/core/device_context.h                  | 7 +++----
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/paddle/fluid/memory/allocation/system_allocator.h b/paddle/fluid/memory/allocation/system_allocator.h
index 14fd7cc5fc8789..67376a3e39a224 100644
--- a/paddle/fluid/memory/allocation/system_allocator.h
+++ b/paddle/fluid/memory/allocation/system_allocator.h
@@ -17,7 +17,6 @@ limitations under the License. */
 #include <stddef.h>  // for size_t
 
 #include <string>
-#include "paddle/utils/test_macros.h"
 
 namespace paddle {
 namespace memory {
@@ -49,7 +48,7 @@ class GPUAllocator : public SystemAllocator {
  public:
   explicit GPUAllocator(int gpu_id) : gpu_id_(gpu_id) {}
 
-  TEST_API virtual void* Alloc(size_t* index, size_t size);
+  virtual void* Alloc(size_t* index, size_t size);
   virtual void Free(void* p, size_t size, size_t index);
   virtual bool UseGpu() const;
 
@@ -60,7 +59,7 @@ class GPUAllocator : public SystemAllocator {
 
 class CUDAPinnedAllocator : public SystemAllocator {
  public:
-  TEST_API virtual void* Alloc(size_t* index, size_t size);
+  virtual void* Alloc(size_t* index, size_t size);
   virtual void Free(void* p, size_t size, size_t index);
   virtual bool UseGpu() const;
 
diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index 3804802e84260d..f4334642477971 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -415,7 +415,7 @@ T* DeviceContext::HostAlloc(TensorBase* tensor, size_t requested_size) const {
 }
 
 #define DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(dtype)              \
-  template dtype* DeviceContext::Alloc(                              \
+  template dtype* TEST_API DeviceContext::Alloc(                     \
       TensorBase* tensor, size_t requested_size, bool pinned) const; \
   template dtype* DeviceContext::HostAlloc(TensorBase* tensor,       \
                                            size_t requested_size) const;
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index e0d555fe01b3e0..06490eed8dc492 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -152,10 +152,9 @@ class PADDLE_API DeviceContext {
                       bool fake_alloc = false) const;
 
   template <typename T>
-  T* Alloc(TensorBase* tensor,
-           size_t requested_size = 0,
-           bool pinned = false) const;
-
+  TEST_API T* Alloc(TensorBase* tensor,
+                    size_t requested_size = 0,
+                    bool pinned = false) const;
   /**
    * @brief Allocate host memory for tensor.
    */

From dc99670c70f33bfb3c055a2d677877f4cb210fb8 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Sun, 10 Mar 2024 14:06:54 +0800
Subject: [PATCH 10/20] return ci


From 42885681eb8a24eb81814fefbfc97e7780e9fa6a Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Mon, 11 Mar 2024 22:21:55 +0800
Subject: [PATCH 11/20] fix bug

---
 paddle/phi/core/device_context.cc |  2 +-
 paddle/phi/core/device_context.h  | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index f4334642477971..3804802e84260d 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -415,7 +415,7 @@ T* DeviceContext::HostAlloc(TensorBase* tensor, size_t requested_size) const {
 }
 
 #define DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(dtype)              \
-  template dtype* TEST_API DeviceContext::Alloc(                     \
+  template dtype* DeviceContext::Alloc(                              \
       TensorBase* tensor, size_t requested_size, bool pinned) const; \
   template dtype* DeviceContext::HostAlloc(TensorBase* tensor,       \
                                            size_t requested_size) const;
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index 7b2184d9ef60e4..a788498e0059e3 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -151,10 +151,17 @@ class PADDLE_API DeviceContext {
                       bool pinned = false,
                       bool fake_alloc = false) const;
 
+#if defined(_WIN32) && defined(PADDLE_WITH_TESTING)
   template <typename T>
-  TEST_API T* Alloc(TensorBase* tensor,
-                    size_t requested_size = 0,
-                    bool pinned = false) const;
+  __declspec(dllexport) T* Alloc(TensorBase* tensor,
+                                 size_t requested_size = 0,
+                                 bool pinned = false) const;
+#else
+  template <typename T>
+  T* Alloc(TensorBase* tensor,
+           size_t requested_size = 0,
+           bool pinned = false) const;
+#endif
   /**
    * @brief Allocate host memory for tensor.
    */

From 954c101042855c1b055e974ad8acddb6d529fa65 Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Wed, 13 Mar 2024 11:41:18 +0800
Subject: [PATCH 12/20] Update device_context.h

---
 paddle/phi/core/device_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index a788498e0059e3..8288b6c32a39bf 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -153,7 +153,7 @@ class PADDLE_API DeviceContext {
 
 #if defined(_WIN32) && defined(PADDLE_WITH_TESTING)
   template <typename T>
-  __declspec(dllexport) T* Alloc(TensorBase* tensor,
+  __declspec(dllimport) T* Alloc(TensorBase* tensor,
                                  size_t requested_size = 0,
                                  bool pinned = false) const;
 #else

From 84599993dbcb6810911eb800bb0c04060465dcc0 Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Wed, 13 Mar 2024 13:31:10 +0800
Subject: [PATCH 13/20] Update device_context.h

---
 paddle/phi/core/device_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index 8288b6c32a39bf..3702575167080c 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -155,7 +155,7 @@ class PADDLE_API DeviceContext {
   template <typename T>
   __declspec(dllimport) T* Alloc(TensorBase* tensor,
                                  size_t requested_size = 0,
-                                 bool pinned = false) const;
+                                 bool pinned = false) ;
 #else
   template <typename T>
   T* Alloc(TensorBase* tensor,

From 1eea8279f1c8241c2dd30c874fb1cd0111daedf6 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Wed, 13 Mar 2024 22:13:40 +0800
Subject: [PATCH 14/20] fix bug

---
 paddle/phi/core/device_context.cc |  6 +++---
 paddle/phi/core/device_context.h  | 14 ++++----------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index 3804802e84260d..2679dc3295d380 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -395,9 +395,9 @@ void* DeviceContext::Alloc(TensorBase* tensor,
 }
 
 template <typename T>
-T* DeviceContext::Alloc(TensorBase* tensor,
-                        size_t requested_size,
-                        bool pinned) const {
+TEST_API T* DeviceContext::Alloc(TensorBase* tensor,
+                                 size_t requested_size,
+                                 bool pinned) const {
   DataType dtype = phi::CppTypeToDataType<T>::Type();
   return static_cast<T*>(this->Alloc(tensor, dtype, requested_size, pinned));
 }
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index 3702575167080c..25d748c9150869 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -151,17 +151,11 @@ class PADDLE_API DeviceContext {
                       bool pinned = false,
                       bool fake_alloc = false) const;
 
-#if defined(_WIN32) && defined(PADDLE_WITH_TESTING)
   template <typename T>
-  __declspec(dllimport) T* Alloc(TensorBase* tensor,
-                                 size_t requested_size = 0,
-                                 bool pinned = false) ;
-#else
-  template <typename T>
-  T* Alloc(TensorBase* tensor,
-           size_t requested_size = 0,
-           bool pinned = false) const;
-#endif
+  TEST_API T* Alloc(TensorBase* tensor,
+                    size_t requested_size = 0,
+                    bool pinned = false) const;
+
   /**
    * @brief Allocate host memory for tensor.
    */

From ea830c4ef6d7c436067b55d6f25f8e44d9d67348 Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Thu, 14 Mar 2024 16:14:03 +0800
Subject: [PATCH 15/20] Update device_context.cc

---
 paddle/phi/core/device_context.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index 2679dc3295d380..917657bf5bdbb4 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -415,7 +415,7 @@ T* DeviceContext::HostAlloc(TensorBase* tensor, size_t requested_size) const {
 }
 
 #define DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(dtype)              \
-  template dtype* DeviceContext::Alloc(                              \
+  template TEST_API dtype* DeviceContext::Alloc(                              \
       TensorBase* tensor, size_t requested_size, bool pinned) const; \
   template dtype* DeviceContext::HostAlloc(TensorBase* tensor,       \
                                            size_t requested_size) const;

From 525a875777d5310d24038d3ce5abc9487510c814 Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Thu, 14 Mar 2024 16:14:40 +0800
Subject: [PATCH 16/20] Update device_context.cc

---
 paddle/phi/core/device_context.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index 917657bf5bdbb4..62936487f56039 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -395,7 +395,7 @@ void* DeviceContext::Alloc(TensorBase* tensor,
 }
 
 template <typename T>
-TEST_API T* DeviceContext::Alloc(TensorBase* tensor,
+T* DeviceContext::Alloc(TensorBase* tensor,
                                  size_t requested_size,
                                  bool pinned) const {
   DataType dtype = phi::CppTypeToDataType<T>::Type();

From ff679075f532d640b82e4286575e40fe0d205814 Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Thu, 14 Mar 2024 17:59:57 +0800
Subject: [PATCH 17/20] Update cudnn_bn_add_relu_test.cc

---
 test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
index 010ca490049d3c..cad204415174b1 100644
--- a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
+++ b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
@@ -33,6 +33,13 @@ namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace op = paddle::operators;
 
+USE_OP_ITSELF(batch_norm);
+USE_OP_ITSELF(fused_bn_add_activation);
+USE_OP_ITSELF(fused_bn_add_activation_grad);
+PD_DECLARE_KERNEL(batch_norm, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(fused_bn_add_activation, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(fused_bn_add_activation_grad, GPU, ALL_LAYOUT);
+
 template <typename T>
 void InitRandomTensor(const std::vector<int64_t> &dims,
                       phi::DenseTensor *cpu_out) {

From ca287a649606d99bbf72662c9d4e81f74816445b Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Thu, 14 Mar 2024 18:01:14 +0800
Subject: [PATCH 18/20] Update CMakeLists.txt

---
 test/cpp/fluid/fused/CMakeLists.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt
index 324987ede660d0..35e8d06288eb78 100644
--- a/test/cpp/fluid/fused/CMakeLists.txt
+++ b/test/cpp/fluid/fused/CMakeLists.txt
@@ -39,7 +39,16 @@ if(WITH_GPU OR WITH_ROCM)
   endif()
   # resnet_unit needs cudnn 8.0 above
   if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000))
-    paddle_test(test_cudnn_norm_conv SRCS cudnn_norm_conv_test.cc)
+    nv_test(
+      test_cudnn_norm_conv
+      SRCS cudnn_norm_conv_test.cc
+      DEPS generated_op
+           depthwise_conv
+           tensor
+           op_registry
+           device_context
+           phi
+           common)
     paddle_test(test_cudnn_bn_add_relu SRCS cudnn_bn_add_relu_test.cc)
   endif()
 endif()

From 7f69e1eeaf8ed065f08ab673df8c61d70c9978d7 Mon Sep 17 00:00:00 2001
From: Galaxy1458 <55453380+Galaxy1458@users.noreply.github.com>
Date: Thu, 14 Mar 2024 19:04:23 +0800
Subject: [PATCH 19/20] Update cudnn_bn_add_relu_test.cc

---
 test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
index cad204415174b1..010ca490049d3c 100644
--- a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
+++ b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc
@@ -33,13 +33,6 @@ namespace framework = paddle::framework;
 namespace platform = paddle::platform;
 namespace op = paddle::operators;
 
-USE_OP_ITSELF(batch_norm);
-USE_OP_ITSELF(fused_bn_add_activation);
-USE_OP_ITSELF(fused_bn_add_activation_grad);
-PD_DECLARE_KERNEL(batch_norm, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(fused_bn_add_activation, GPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(fused_bn_add_activation_grad, GPU, ALL_LAYOUT);
-
 template <typename T>
 void InitRandomTensor(const std::vector<int64_t> &dims,
                       phi::DenseTensor *cpu_out) {

From 200623a8cc54c1654cdcdc74770c7ba6c41ba355 Mon Sep 17 00:00:00 2001
From: ccsuzzh <1719571694@qq.com>
Date: Fri, 15 Mar 2024 17:28:48 +0800
Subject: [PATCH 20/20] fix codestyle

---
 paddle/phi/core/device_context.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc
index 62936487f56039..6169681885b7be 100644
--- a/paddle/phi/core/device_context.cc
+++ b/paddle/phi/core/device_context.cc
@@ -396,8 +396,8 @@ void* DeviceContext::Alloc(TensorBase* tensor,
 
 template <typename T>
 T* DeviceContext::Alloc(TensorBase* tensor,
-                                 size_t requested_size,
-                                 bool pinned) const {
+                        size_t requested_size,
+                        bool pinned) const {
   DataType dtype = phi::CppTypeToDataType<T>::Type();
   return static_cast<T*>(this->Alloc(tensor, dtype, requested_size, pinned));
 }
@@ -415,7 +415,7 @@ T* DeviceContext::HostAlloc(TensorBase* tensor, size_t requested_size) const {
 }
 
 #define DEVICE_CONTEXT_MEMBER_FUNC_INSTANTIATION(dtype)              \
-  template TEST_API dtype* DeviceContext::Alloc(                              \
+  template TEST_API dtype* DeviceContext::Alloc(                     \
       TensorBase* tensor, size_t requested_size, bool pinned) const; \
   template dtype* DeviceContext::HostAlloc(TensorBase* tensor,       \
                                            size_t requested_size) const;