[metax] modify fused_bias_dropout_residual_layer_norm (#34)

jxwangmetax · web-flow · commit b22fc1317d78 · 2025-09-17T17:58:21.000+08:00
* modify cmake for warpctc and warprnnt

* modify conv for tf32 and fp32

* modify conv kernel

* modify library to static library

* modify kernel

* modify fused_bias_dropout_residual_layer_norm
diff --git a/backends/metax_gpu/patch/paddle.patch b/backends/metax_gpu/patch/paddle.patch
@@ -470,6 +470,25 @@ index 88663ec880..98b93072a3 100644
  #include "paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h"
  #include "paddle/phi/kernels/funcs/detail/gru_kernel.h"
  
+diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
+index 4eae698648..5c047723ea 100644
+--- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
++++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
+@@ -43,11 +43,11 @@ template <typename T>
+ using LayerNormParamType = typename CudnnDataType<T>::BatchNormParamType;
+ 
+ inline static int GetDesiredBlockDim(int64_t block_dim) {
+-  const int kMaxBlockDim = 512;
++  const int kMaxBlockDim = 256;
+ #ifdef __HIPCC__
+   const int lwarpSize = 64;
+ #else
+-  const int lwarpSize = 32;
++  const int lwarpSize = 64;
+ #endif
+   return block_dim >= kMaxBlockDim ? kMaxBlockDim : lwarpSize;
+ }
+
 diff --git a/paddle/phi/kernels/funcs/math/context_project.h b/paddle/phi/kernels/funcs/math/context_project.h
 index 15e1a4a3c3..e4780538d7 100644
 --- a/paddle/phi/kernels/funcs/math/context_project.h