[AutoParallel] Add take_along_axis spmd rules (#72063)

NKNaN · web-flow · commit ecd685afb0ff · 2025-06-30T11:25:57.000+08:00
* add take_along_axis spmd rule

* update cmakelists

* update rule

* update rules and test

* update rules and test

* fix test
diff --git a/paddle/phi/infermeta/spmd_rules/rules.cc b/paddle/phi/infermeta/spmd_rules/rules.cc
@@ -805,4 +805,9 @@ PD_REGISTER_SPMD_RULE(
     fused_gemm_epilogue,
     PD_INFER_SPMD(phi::distributed::FusedGemmEpilogueInferSpmdBase));
 
+// take_along_axis
+PD_REGISTER_SPMD_RULE(
+    take_along_axis,
+    PD_INFER_SPMD(phi::distributed::TakeAlongAxisInferSpmd),
+    PD_INFER_SPMD(phi::distributed::TakeAlongAxisGradInferSpmd));
 }  // namespace phi::distributed
diff --git a/paddle/phi/infermeta/spmd_rules/rules.h b/paddle/phi/infermeta/spmd_rules/rules.h
@@ -80,6 +80,7 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/squared_l2_norm.h"
 #include "paddle/phi/infermeta/spmd_rules/squeeze.h"
 #include "paddle/phi/infermeta/spmd_rules/stack.h"
+#include "paddle/phi/infermeta/spmd_rules/take_along_axis.h"
 #include "paddle/phi/infermeta/spmd_rules/tile.h"
 #include "paddle/phi/infermeta/spmd_rules/topk.h"
 #include "paddle/phi/infermeta/spmd_rules/transpose.h"
diff --git a/paddle/phi/infermeta/spmd_rules/take_along_axis.cc b/paddle/phi/infermeta/spmd_rules/take_along_axis.cc
@@ -0,0 +1,160 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/take_along_axis.h"
+
+#include "glog/logging.h"
+
+#include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h"
+#include "paddle/phi/infermeta/spmd_rules/utils.h"
+
+namespace phi::distributed {
+SpmdInfo TakeAlongAxisInferSpmd(const DistMetaTensor& x,
+                                const DistMetaTensor& index,
+                                int axis) {
+  /*
+    gather computation formula:
+
+    out[i][j][k] = x[index[i][j][k]][j][k]  # if dim == 0
+    out[i][j][k] = x[i][index[i][j][k]][k]  # if dim == 1
+    out[i][j][k] = x[i][j][index[i][j][k]]  # if dim == 2
+  */
+
+  // Deduced spmd rule:
+  // x: cannot be sharded on `axis` dim;
+  // index: the `axis` dim could be either sharded or not, other dimension
+  //        should be the same as x;
+  // out: same as index;
+  // For non-`axis` dim, if the sizes of this dim in x and index are not
+  // the same, this dim should not be sharded.
+
+  EXTRACT_SHAPE_AND_DIST_ATTR(x);
+  EXTRACT_SHAPE_AND_DIST_ATTR(index);
+  PADDLE_ENFORCE_EQ(x_ndim,
+                    index_ndim,
+                    common::errors::InvalidArgument(
+                        "x and index must have the same number of dimensions "
+                        "but received x_ndim [%d], index_ndim [%d]",
+                        x_ndim,
+                        index_ndim));
+
+  // Step1: Build Einsum Notation
+  // e.g. axis=1, x: a1c, index: abc, out: abc
+  std::string alphabet = "abcdefghijklmnopqrstuvwxyz";
+  std::string index_axes = GetBroadcastAxes(index_ndim, index_ndim, alphabet);
+  std::string x_axes = index_axes;
+  x_axes.replace(axis, 1, "1");
+  for (int i = 0; i < index_ndim; ++i) {
+    if (i != axis && x_shape[i] != index_shape[i]) {
+      x_axes.replace(i, 1, "1");
+      index_axes.replace(i, 1, "1");
+    }
+  }
+  std::string out_axes = index_axes;
+
+  // Step2: Sharding Propagation
+  // Step2.1: Merge input shardings
+  std::vector<int64_t> x_dims_mapping(x_dims_mapping_src);
+  x_dims_mapping[axis] = -1;
+  std::vector<int64_t> index_dims_mapping(index_dims_mapping_src);
+  std::unordered_map<std::string, int64_t> axis_to_dim_map =
+      ShardingMergeForTensors(
+          {{x_axes, x_dims_mapping}, {index_axes, index_dims_mapping}});
+
+  // Step2.2: Infer output dims mapping
+  TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_dist_attr_dst.set_dims_mapping(
+      GetDimsMappingForAxes(x_axes, axis_to_dim_map));
+
+  TensorDistAttr index_dist_attr_dst =
+      CopyTensorDistAttrForOutput(index_dist_attr_src);
+  index_dist_attr_dst.set_dims_mapping(
+      GetDimsMappingForAxes(index_axes, axis_to_dim_map));
+
+  TensorDistAttr out_dist_attr =
+      CopyTensorDistAttrForOutput(index_dist_attr_src);
+  out_dist_attr.set_dims_mapping(
+      GetDimsMappingForAxes(out_axes, axis_to_dim_map));
+
+  VLOG(4) << "x_axes: " << x_axes << " index_axes: " << index_axes
+          << " out_axes: " << out_axes;
+  LOG_SPMD_INPUT(x);
+  LOG_SPMD_INPUT(index);
+  VLOG(4) << "out";
+  VLOG(4) << "dist_attr: [" << out_dist_attr.to_string() << "]";
+  return {{x_dist_attr_dst, index_dist_attr_dst}, {out_dist_attr}};
+}
+
+SpmdInfo TakeAlongAxisGradInferSpmd(const DistMetaTensor& x,
+                                    const DistMetaTensor& index,
+                                    const DistMetaTensor& out_grad,
+                                    int axis) {
+  EXTRACT_SHAPE_AND_DIST_ATTR(x);
+  EXTRACT_SHAPE_AND_DIST_ATTR(index);
+  EXTRACT_SHAPE_AND_DIST_ATTR(out_grad);
+
+  // Step1: Build Einsum Notation
+  // e.g. axis=1, out_grad: abc -> x: a1c, index: abc, x_grad: a1c
+  std::string alphabet = "abcdefghijklmnopqrstuvwxyz";
+  std::string out_grad_axes =
+      GetBroadcastAxes(out_grad_ndim, out_grad_ndim, alphabet);
+  std::string index_axes = out_grad_axes;
+  std::string x_axes = index_axes;
+  x_axes.replace(axis, 1, "1");
+  for (int i = 0; i < index_ndim; ++i) {
+    if (i != axis && x_shape[i] != index_shape[i]) {
+      x_axes.replace(i, 1, "1");
+      index_axes.replace(i, 1, "1");
+      out_grad_axes.replace(i, 1, "1");
+    }
+  }
+  std::string x_grad_axes = x_axes;
+
+  // Step2: Sharding Propagation
+  // Step2.1: Merge input shardings
+  std::vector<int64_t> out_grad_dims_mapping(out_grad_dims_mapping_src);
+  std::unordered_map<std::string, int64_t> axis_to_dim_map =
+      ShardingMergeForTensors({{out_grad_axes, out_grad_dims_mapping}});
+
+  // step2.2: Infer input dims mapping from merged input dims mapping
+  std::vector<int64_t> index_dims_mapping =
+      GetDimsMappingForAxes(index_axes, axis_to_dim_map);
+  auto index_dist_attr_dst = CopyTensorDistAttrForOutput(index_dist_attr_src);
+  index_dist_attr_dst.set_dims_mapping(index_dims_mapping);
+
+  auto out_grad_dist_attr_dst =
+      CopyTensorDistAttrForOutput(out_grad_dist_attr_src);
+  out_grad_dist_attr_dst.set_dims_mapping(index_dims_mapping);
+
+  auto x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_dist_attr_dst.set_dims_mapping(
+      GetDimsMappingForAxes(x_axes, axis_to_dim_map));
+
+  auto x_grad_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_grad_dist_attr_dst.set_dims_mapping(
+      GetDimsMappingForAxes(x_grad_axes, axis_to_dim_map));
+
+  VLOG(4) << "out_grad";
+  VLOG(4) << "dist_attr: [" << out_grad_dist_attr_dst.to_string() << "]";
+  VLOG(4) << "index";
+  VLOG(4) << "dist_attr: [" << index_dist_attr_dst.to_string() << "]";
+  VLOG(4) << "x";
+  VLOG(4) << "dist_attr: [" << x_dist_attr_dst.to_string() << "]";
+  VLOG(4) << "x_grad";
+  VLOG(4) << "dist_attr: [" << x_grad_dist_attr_dst.to_string() << "]";
+
+  return {{x_dist_attr_dst, index_dist_attr_dst, out_grad_dist_attr_dst},
+          {x_grad_dist_attr_dst}};
+}
+}  // namespace phi::distributed
diff --git a/paddle/phi/infermeta/spmd_rules/take_along_axis.h b/paddle/phi/infermeta/spmd_rules/take_along_axis.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+SpmdInfo TakeAlongAxisInferSpmd(const DistMetaTensor& x,
+                                const DistMetaTensor& index,
+                                int axis);
+
+SpmdInfo TakeAlongAxisGradInferSpmd(const DistMetaTensor& x,
+                                    const DistMetaTensor& index,
+                                    const DistMetaTensor& out_grad,
+                                    int axis);
+
+}  // namespace distributed
+}  // namespace phi
diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -3577,6 +3577,7 @@
   infer_meta :
     func : UnchangedInferMeta
     param : [arr]
+    spmd_rule : TakeAlongAxisGradInferSpmd
   kernel :
     func : take_along_axis_grad
   backward : take_along_axis_double_grad
diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -5229,6 +5229,7 @@
   infer_meta :
     func : TakeAlongAxisInferMeta
     param : [arr, indices, axis]
+    spmd_rule : TakeAlongAxisInferSpmd
   kernel :
     func : take_along_axis
     data_type : arr
diff --git a/test/auto_parallel/spmd_rules/CMakeLists.txt b/test/auto_parallel/spmd_rules/CMakeLists.txt
@@ -61,6 +61,7 @@ if(WITH_DISTRIBUTE)
     py_test_modules(test_fused_gemm_epilogue_rule MODULES
                     test_fused_gemm_epilogue_rule)
     py_test_modules(test_gelu_rule MODULES test_gelu_rule)
+    py_test_modules(test_take_along_axis_rule MODULES test_take_along_axis_rule)
   endif()
   # End of unittests WITH single card WITHOUT timeout
 
diff --git a/test/auto_parallel/spmd_rules/test_take_along_axis_rule.py b/test/auto_parallel/spmd_rules/test_take_along_axis_rule.py