diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 649f24cfd53..5d3832a35d7 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -47,6 +47,10 @@ jobs:
     uses: ./.github/workflows/_IXUCA.yml
     needs: [Codestyle-Check]
 
+  metax:
+    name: metax
+    uses: ./.github/workflows/_Metax-X86.yml
+    needs: [Codestyle-Check]
   #sdaa:
     #name: sdaa
     #uses: ./.github/workflows/_SDAA.yml
diff --git a/.github/workflows/_Metax-X86.yaml b/.github/workflows/_Metax-X86.yaml
deleted file mode 100644
index 486236955ad..00000000000
--- a/.github/workflows/_Metax-X86.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-name: paddle metax gpu test
-
-on:
-  workflow_dispatch:
-  pull_request:
-    types: [opened, synchronize]
-    branches: [develop, release/**]
-permissions: read-all
-
-defaults:
-  run:
-    shell: bash
-
-jobs:
-  metax-gpu-test:
-    runs-on: paddle-metax-runner-set
-    # runs-on: debug-paddle-runner-set
-    steps:
-      - name: Checkout repository
-        run: |
-          git config --global user.name "GitHub Actions"
-          git config --global user.email "actions@github.com"
-
-          git clone \
-            --reference-if-able /home/runner/PaddleCustomDevice \
-            --depth=1 \
-            --shallow-submodules \
-            --jobs=8 \
-            --branch ${{ github.base_ref || github.ref_name}} \
-            --recurse-submodules \
-            https://${{ github.actor }}:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git .
-
-          if [ "${{ github.event_name }}" == "pull_request" ]; then
-            git fetch origin pull/${{ github.event.pull_request.number }}/head:pull/${{ github.event.pull_request.number }}/head
-            git checkout pull/${{ github.event.pull_request.number }}/head
-
-
-
-
-            paddle_branch=${{ github.base_ref || github.ref_name}}
-            echo $paddle_branch
-            # sleep 10000
-            change_numbers=$(git diff --name-only remotes/origin/${paddle_branch} | wc -l)
-            echo $change_numbers
-
-
-            change_backend=$(git diff --name-only remotes/origin/${paddle_branch} | grep -c "backends/" || true)
-            echo $change_backend
-            change_metax_only=$(git diff --name-only remotes/origin/${paddle_branch} | grep -c "backends/metax_gpu" || true)
-            echo $change_metax_only
-
-            # change_backend=$(git diff --name-only remotes/origin/${paddle_branch} | grep "backends/"| wc -l)
-            # echo $change_backend
-            # change_metax_only=$(git diff --name-only remotes/origin/${paddle_branch} | grep "backends/metax_gpu"| wc -l)
-            # echo $change_metax_only
-
-            git diff --name-only remotes/origin/${paddle_branch}
-
-            if [ $change_numbers -ne $change_backend ]; then
-              echo "Common file changed, continue to run metax FULL CI test ..."
-            elif [ $paddle_branch -eq 0 ] ; then
-              echo "NO metax backend changes found, skip metax FULL CI ....."
-              exit 0
-            fi
-
-
-            # git submodule update --init --recursive
-          fi
-
-
-      - name: compile
-        run: |
-          # sleep 10000
-          cd backends/metax_gpu
-          bash build.sh
-
-      - name: run test
-
-        run: |
-          cd backends/metax_gpu/tests
-          bash run_test.sh -j 8
-
-      - name: push whl
-        env:
-          PR_ID: ${{ github.event.pull_request.number }}
-          COMMIT_ID: ${{ github.event.pull_request.head.sha }}
-        run: |
-          pip install bce-python-sdk==0.8.74
-          export AK=paddle
-          export SK=paddle
-          if [ ! -f "BosClient.py}" ]; then
-            wget -q --no-proxy https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
-            tar xf bos_retry.tar.gz
-          fi
-          cp backends/metax_gpu/build/dist/paddle_metax_gpu*.whl .
-          python BosClient.py paddle_metax_gpu*.whl paddle-github-action/PaddleCustomDevice/metax_gpu/${PR_ID}/${COMMIT_ID}
diff --git a/.github/workflows/_Metax-X86.yml b/.github/workflows/_Metax-X86.yml
new file mode 100644
index 00000000000..2ff64cae36a
--- /dev/null
+++ b/.github/workflows/_Metax-X86.yml
@@ -0,0 +1,102 @@
+name: PR-CI-METAX
+
+
+on:
+  workflow_call:
+    inputs:
+      workflow-name:
+        type: string
+        required: false
+      clone_dir:
+        type: string
+        required: false
+        default: 'PaddlecustomDevice'
+      is_pr:
+        type: string
+        required: false
+        default: 'true'
+
+
+defaults:
+  run:
+    shell: bash
+
+
+jobs:
+  metax-gpu-test:
+    runs-on: paddle-metax-runner-set
+    env:
+      PR_ID: ${{ github.event.pull_request.number }}
+      COMMIT_ID: ${{ github.event.pull_request.head.sha }}
+      BRANCH: develop
+
+
+    steps:
+      - name: Checkout repository
+        run: |
+          set -x
+          wget -q --tries=5 --no-proxy https://paddle-github-action.bj.bcebos.com/PaddleCustomDevice/PR/${PR_ID}/${COMMIT_ID}/PaddleCustomDevice.tar.gz --no-check-certificate
+          echo "Extracting PaddleCustomDevice.tar.gz"
+          tar -xf PaddleCustomDevice.tar.gz
+          cd PaddleCustomDevice
+          git config --global --add safe.directory "*"
+          git remote add upstream https://github.com/PaddlePaddle/PaddleCustomDevice.git
+          git merge ${BRANCH} --no-edit
+          git --no-pager log --pretty=oneline -5
+
+      - name: Check bypass
+        id: check-bypass
+        uses: ./PaddleCustomDevice/.github/actions/check-bypass
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          workflow-name: metax
+
+
+      - name: RUN METAX-GPU
+        id: run-metax
+        if: steps.check-bypass.outputs.can-skip != 'true'
+        run: |
+          cd PaddleCustomDevice
+          # !!!!! SKIP IF NO METAX CHANGE !!!!
+          echo "=========== Checking PR Changes If METAX FULL CI Needed ==========="
+
+          change_numbers=$(git diff --name-only remotes/origin/${BRANCH} | wc -l)
+
+          change_backend=$(git diff --name-only remotes/origin/${BRANCH} | grep "backends/"| wc -l)
+          change_metax_only=$(git diff --name-only remotes/origin/${BRANCH} | grep "backends/metax_gpu"| wc -l)
+          git --no-pager diff --name-only remotes/origin/${BRANCH}
+          if [ $change_numbers -ne $change_backend ]; then
+            echo "Common file changed, continue to run METAX FULL CI test ..."
+            echo "should_skip=false" >> $GITHUB_OUTPUT
+          elif [ $change_metax_only -eq 0 ] ; then
+            echo "NO METAX backend changes found, skip METAX FULL CI ...."
+            echo "should_skip=true" >> $GITHUB_OUTPUT
+            exit 0
+          else
+            echo "should_skip=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: compile
+        run: |
+          cd PaddleCustomDevice/backends/metax_gpu
+          bash build.sh
+
+      - name: run test
+        run: |
+          cd PaddleCustomDevice/backends/metax_gpu/tests
+          bash run_test.sh -j 8
+
+      - name: push whl
+        env:
+          PR_ID: ${{ github.event.pull_request.number }}
+          COMMIT_ID: ${{ github.event.pull_request.head.sha }}
+        run: |
+          pip install bce-python-sdk==0.8.74
+          export AK=paddle
+          export SK=paddle
+          if [ ! -f "BosClient.py}" ]; then
+            wget -q --no-proxy https://xly-devops.bj.bcebos.com/home/bos_retry.tar.gz --no-check-certificate
+            tar xf bos_retry.tar.gz
+          fi
+          cp PaddleCustomDevice/backends/metax_gpu/build/dist/paddle_metax_gpu*.whl .
+          python BosClient.py paddle_metax_gpu*.whl paddle-github-action/PaddleCustomDevice/metax_gpu/${PR_ID}/${COMMIT_ID}
diff --git a/Paddle b/Paddle
index db736a01176..43f16a629f5 160000
--- a/Paddle
+++ b/Paddle
@@ -1 +1 @@
-Subproject commit db736a011768c9b112723d60726f0b14d2c5e4e2
+Subproject commit 43f16a629f5b4653fa879ba2635c32262f37331e
diff --git a/backends/metax_gpu/patch/paddle.patch b/backends/metax_gpu/patch/paddle.patch
index c9390e0c4d7..bc74c4e44da 100755
--- a/backends/metax_gpu/patch/paddle.patch
+++ b/backends/metax_gpu/patch/paddle.patch
@@ -19,10 +19,10 @@ index cfada544d4..a690e97d74 100644
  
  set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
 diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt
-index 99a0116d92..2566e7c41a 100755
+index 8d445b39ae..504e7b6293 100755
 --- a/paddle/fluid/operators/fused/CMakeLists.txt
 +++ b/paddle/fluid/operators/fused/CMakeLists.txt
-@@ -43,6 +43,11 @@ if(WITH_GPU OR WITH_ROCM)
+@@ -39,6 +39,11 @@ if(WITH_GPU OR WITH_ROCM)
      op_library(fused_multi_transformer_int8_op)
    endif()
  
@@ -34,19 +34,6 @@ index 99a0116d92..2566e7c41a 100755
    if(CUDA_VERSION GREATER_EQUAL 11.6)
      op_library(fused_gemm_epilogue_op)
    endif()
-diff --git a/paddle/fluid/platform/profiler/cupti_data_process.cc b/paddle/fluid/platform/profiler/cupti_data_process.cc
-index bff0f2bf70..9376b5781f 100644
---- a/paddle/fluid/platform/profiler/cupti_data_process.cc
-+++ b/paddle/fluid/platform/profiler/cupti_data_process.cc
-@@ -16,7 +16,7 @@
- 
- #include <cstdio>
- 
--#include "paddle/fluid/platform/enforce.h"
-+// #include "paddle/fluid/platform/enforce.h"
- #include "paddle/phi/core/os_info.h"
- #include "paddle/phi/core/platform/device/gpu/gpu_info.h"
- #include "paddle/phi/core/platform/profiler/utils.h"
 diff --git a/paddle/phi/backends/dynload/cublas.h b/paddle/phi/backends/dynload/cublas.h
 index bda9cbe17e..c73eba9c8a 100644
 --- a/paddle/phi/backends/dynload/cublas.h
@@ -98,7 +85,7 @@ index 8b2e08c777..ca926df151 100644
  #define CUBLASLT_BLAS_ROUTINE_EACH(__macro)      \
    __macro(cublasLtCreate);                       \
 diff --git a/paddle/phi/backends/dynload/cudnn.h b/paddle/phi/backends/dynload/cudnn.h
-index a943bbed9a..af931490e3 100644
+index ad2ada9dfa..9e8389e7dc 100644
 --- a/paddle/phi/backends/dynload/cudnn.h
 +++ b/paddle/phi/backends/dynload/cudnn.h
 @@ -38,7 +38,10 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
@@ -134,7 +121,7 @@ index 1547909d92..ef20838434 100644
      }                                                                \
    };                                                                 \
 diff --git a/paddle/phi/backends/dynload/cupti.h b/paddle/phi/backends/dynload/cupti.h
-index 59e92955c9..d2f8c2da15 100644
+index 4241a512e8..94e32b743e 100644
 --- a/paddle/phi/backends/dynload/cupti.h
 +++ b/paddle/phi/backends/dynload/cupti.h
 @@ -24,8 +24,8 @@ limitations under the License. */
@@ -148,7 +135,7 @@ index 59e92955c9..d2f8c2da15 100644
  
  extern std::once_flag cupti_dso_flag;
  extern void *cupti_dso_handle;
-@@ -71,7 +71,7 @@ extern void *cupti_dso_handle;
+@@ -105,7 +105,7 @@ inline bool IsXPUTracingEnabled() {
  CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP);
  
  #undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP
@@ -191,7 +178,7 @@ index e8cb0ac643..e8e7596d44 100644
      }                                                                \
    };                                                                 \
 diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc
-index c74ae9592e..f6dc68917c 100644
+index 39f50bd95d..4d627b99b7 100644
 --- a/paddle/phi/backends/dynload/dynamic_loader.cc
 +++ b/paddle/phi/backends/dynload/dynamic_loader.cc
 @@ -18,7 +18,6 @@ limitations under the License. */
@@ -229,7 +216,7 @@ index c5309e7e11..3328571380 100644
      }                                                              \
    };                                                               \
 diff --git a/paddle/phi/backends/gpu/cuda/cuda_device_function.h b/paddle/phi/backends/gpu/cuda/cuda_device_function.h
-index 092365a961..23d3b65dc6 100644
+index 092365a961..8bd3f9fcea 100644
 --- a/paddle/phi/backends/gpu/cuda/cuda_device_function.h
 +++ b/paddle/phi/backends/gpu/cuda/cuda_device_function.h
 @@ -1,3 +1,4 @@
@@ -347,7 +334,22 @@ index 092365a961..23d3b65dc6 100644
    CREATE_SHFL_MASK(mask, tid < len);
  
    for (int offset = warpSize / 2; offset > 0; offset /= 2)
-
+diff --git a/paddle/phi/common/float16.h b/paddle/phi/common/float16.h
+index d970878dc2..fe0382ccad 100644
+--- a/paddle/phi/common/float16.h
++++ b/paddle/phi/common/float16.h
+@@ -105,8 +105,9 @@ struct PADDLE_ALIGN(2) float16 {
+ #endif
+ 
+   HOSTDEVICE inline explicit float16(float val) {
+-#if defined(PADDLE_CUDA_FP16) && \
+-    (defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
++// #if defined(PADDLE_CUDA_FP16) && \
++//     (defined(__HIPCC__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300))
++#if 1
+     half tmp = __float2half(val);
+     x = *reinterpret_cast<uint16_t*>(&tmp);
+ 
 diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h
 index 024a7de73e..66b373d698 100644
 --- a/paddle/phi/core/enforce.h
@@ -651,7 +653,7 @@ index 461e6e2474..48a64ae9ce 100644
    dim3 threads(kWarpSize, kBlockDimY);
    dim3 grids(static_cast<int>((D + kWarpSize - 1) / kWarpSize));
 diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
-index 4eae698648..5c047723ea 100644
+index 470b0d33ee..d58838d53c 100644
 --- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
 +++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h
 @@ -43,11 +43,11 @@ template <typename T>
@@ -995,7 +997,7 @@ index 9d4bb18d55..80405c2b78 100644
          }
        }
 diff --git a/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu b/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu
-index acb3b83bc9..264d2a2b3e 100644
+index 6cf08a5ac7..c09018ba78 100644
 --- a/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu
 +++ b/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu
 @@ -15,7 +15,7 @@
@@ -1008,7 +1010,7 @@ index acb3b83bc9..264d2a2b3e 100644
  namespace phi {
  namespace fusion {
 diff --git a/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu b/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu
-index b2d15a59f8..f64582e85a 100644
+index 1e7869afec..26ac439fc7 100644
 --- a/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu
 +++ b/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu
 @@ -15,7 +15,7 @@
@@ -1021,7 +1023,7 @@ index b2d15a59f8..f64582e85a 100644
  namespace phi {
  namespace fusion {
 diff --git a/paddle/phi/kernels/gpu/depthwise_conv.h b/paddle/phi/kernels/gpu/depthwise_conv.h
-index 2edac5eba5..4f265e3db7 100644
+index 770a3e1296..b0ec1b949b 100644
 --- a/paddle/phi/kernels/gpu/depthwise_conv.h
 +++ b/paddle/phi/kernels/gpu/depthwise_conv.h
 @@ -29,8 +29,8 @@ namespace cub = hipcub;
diff --git a/backends/metax_gpu/tests/ignore.txt b/backends/metax_gpu/tests/ignore.txt
index 215280b8cb8..b1391194d7f 100644
--- a/backends/metax_gpu/tests/ignore.txt
+++ b/backends/metax_gpu/tests/ignore.txt
@@ -12,3 +12,4 @@ test_conv3d_transpose_op
 test_conv3d_layer
 test_conv3d_transpose_part2_op
 test_fused_conv2d_add_act_op
+test_bilinear_interp_v2_op