Skip to content

Commit 2cafe8a

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_class_center_sample
2 parents 310cb7f + 39403d1 commit 2cafe8a

File tree

5 files changed

+25
-34
lines changed

5 files changed

+25
-34
lines changed

paddle/fluid/pir/serialize_deserialize/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ endif()
1313

1414
file(GLOB_RECURSE YAML_PATCH_FILES "*.yaml")
1515
# change pir version when new patches are added
16-
add_definitions(-DDEVELOP_VERSION=2)
16+
add_definitions(-DDEVELOP_VERSION=0)
1717
add_definitions(-DRELEASE_VERSION=2)
1818
set(TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/patch/template.h.in)
1919
set(PATCH_HEADER ${CMAKE_CURRENT_BINARY_DIR}/patch/patch.h)

paddle/fluid/pir/serialize_deserialize/patch/Readme.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ type_patches:
146146
- type : pir::Int64Attribute # 新增属性类型为Int64Attribute
147147
- data : 0 # 新增属性默认值
148148
```
149+
更多patch配置案例可以参考相关单测,在`Paddle/test/cpp/pir/serialize_deserialize` 目录下。
149150

150151
## pir_version 配置说明
151152
### C++端版本号管理与CMake配置
@@ -162,11 +163,12 @@ type_patches:
162163
│ ├─0.yaml
163164
│ └─1.yaml
164165
```
165-
- RELEASE_VERSION 为已发布的版本中PIR版本号,即为patch yaml文件名的最大值。
166-
- DEVELOP_VERSION 为当前develop分支下的PIR版本号,若存在未发布的新增patch,配置在`0.yaml`,且当前的develop pir 版本号为0
166+
- RELEASE_VERSION 为已发布的版本中PIR版本号,即为patch yaml文件名的最大值,每次新版本发布且存在新增patch时,`RELEASE_VERSION + 1`,若无新增patch则无需修改
167+
- DEVELOP_VERSION 为当前develop分支下的PIR版本号,若需要新增patch,配置在`0.yaml`中(没有则说明当前为新版本发布后第一次新增patch,需要新建文件),并将`-DDEVELOP_VERSION`设置为0
167168

168169
- ReadModule和WriteModule参数中的pir_version设为默认值,可以不用传递。pir_version 函数默认值为-1,进入函数后会获取CMake中配置的当前的PIR版本号。
169170

171+
- 完整修改配置流程可以参考PR:https://github.com/PaddlePaddle/Paddle/pull/72751(修改DDEVELOP_VERSION),https://github.com/PaddlePaddle/Paddle/pull/72639(新增patch yaml)
170172
### Python端
171173
- Paddle的主版本号定义在Python端,与PIR version不产生关联。Python端不再需要获取和传入pir_version,直接使用默认值即可。
172174
### Paddle发版要求

paddle/fluid/pybind/eager_method.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,8 @@ static PyObject* tensor_method_set_underline_tensor(TensorObject* self,
14141414
if (self->tensor.is_dense_tensor()) {
14151415
auto* dst_tensor =
14161416
static_cast<phi::DenseTensor*>(self->tensor.impl().get());
1417-
if (!dst_tensor->meta().is_contiguous() ||
1417+
if (self->tensor.has_allocation() &&
1418+
!dst_tensor->meta().is_contiguous() ||
14181419
!src_tensor->meta().is_contiguous()) {
14191420
VLOG(8) << "set_tensor() method , src or dst tensor is not contiguous";
14201421
if (!FLAGS_use_stride_kernel) {

paddle/phi/kernels/gpu/cross_entropy_kernel.cu

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -713,14 +713,16 @@ template <typename T>
713713
static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
714714
const int rank,
715715
const int axis,
716-
const T* logits_data,
716+
const DenseTensor& logits,
717717
const T* labels_data,
718-
T* softmax_data,
718+
DenseTensor* softmax,
719719
T* loss_data,
720720
int N,
721721
int dim,
722722
int D) {
723723
constexpr int kMaxBlockDim = 512;
724+
auto* logits_data = logits.data<T>();
725+
auto* softmax_data = softmax->data<T>();
724726
int64_t block_dim = dim >= kMaxBlockDim
725727
? kMaxBlockDim
726728
: (1 << static_cast<int>(std::log2(dim)));
@@ -762,13 +764,7 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
762764
GPUDNNDataLayout layout = GPUDNNDataLayout::kNCHW;
763765
#ifdef PADDLE_WITH_HIP
764766
miopenTensorDescriptor_t descp = desc.descriptor<T>(layout, tensor_dims);
765-
#else
766-
cudnnTensorDescriptor_t descp = desc.descriptor<T>(layout, tensor_dims);
767-
#endif
768-
769767
auto handle = dev_ctx.cudnn_handle();
770-
771-
#ifdef PADDLE_WITH_HIP
772768
auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE
773769
: MIOPEN_SOFTMAX_MODE_CHANNEL;
774770
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2(
@@ -782,18 +778,8 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx,
782778
MIOPEN_SOFTMAX_LOG,
783779
mode));
784780
#else
785-
auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
786-
: CUDNN_SOFTMAX_MODE_CHANNEL;
787-
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxForward(
788-
handle,
789-
CUDNN_SOFTMAX_LOG,
790-
mode,
791-
phi::backends::gpu::CudnnDataType<T>::kOne(),
792-
descp,
793-
logits_data,
794-
phi::backends::gpu::CudnnDataType<T>::kZero(),
795-
descp,
796-
softmax_data));
781+
SoftmaxForwardCUDAKernelDriver<T, true>(dev_ctx, logits, axis, softmax);
782+
softmax_data = softmax->data<T>();
797783
#endif
798784

799785
const int kDimLog2 = static_cast<int>(Log2Ceil(dim));
@@ -1170,7 +1156,7 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
11701156
VLOG(7) << "rank=" << rank << ", axis = " << axis << ", N = " << N
11711157
<< ", dim = " << dim << ", D = " << D;
11721158
auto* logits_data = logits.data<T>();
1173-
auto* softmax_data = dev_ctx.template Alloc<T>(softmax);
1159+
auto* softmax_data = softmax->data<T>();
11741160
auto stream = dev_ctx.stream();
11751161
constexpr int max_dim = 320;
11761162
if (D == 1) {
@@ -1216,8 +1202,6 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx,
12161202
MIOPEN_SOFTMAX_LOG,
12171203
mode));
12181204
#else
1219-
auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE
1220-
: CUDNN_SOFTMAX_MODE_CHANNEL;
12211205
SoftmaxForwardCUDAKernelDriver<T, true>(dev_ctx, logits, axis, softmax);
12221206
softmax_data = softmax->data<T>();
12231207
#endif
@@ -1352,14 +1336,13 @@ void CrossEntropyWithSoftmaxCUDAKernel(const GPUContext& dev_ctx,
13521336
}
13531337

13541338
if (soft_label) {
1355-
auto* logits_data = logits.data<T>();
13561339
auto* labels_data = label.data<T>();
13571340
SoftmaxWithCrossEntropySoftLabel<T>(dev_ctx,
13581341
rank,
13591342
axis_v,
1360-
logits_data,
1343+
logits,
13611344
labels_data,
1362-
softmax_data,
1345+
softmax,
13631346
loss_data,
13641347
n,
13651348
axis_dim,

python/paddle/tensor/manipulation.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6680,10 +6680,15 @@ def take_along_axis(
66806680
)
66816681
axis = non_negative_axis(arr, axis)
66826682
if broadcast:
6683-
broadcast_shape = infer_broadcast_shape(arr, indices, axis)
6684-
if not broadcast_shape:
6685-
# if indices matrix have larger size than arr, arr should broadcast into indices shape.
6686-
broadcast_shape = indices.shape
6683+
broadcast_shape_list = list(arr.shape)
6684+
for i in range(len(arr.shape)):
6685+
if indices.shape[i] == 0 or arr.shape[i] == 0:
6686+
broadcast_shape_list[i] = 0
6687+
else:
6688+
broadcast_shape_list[i] = max(arr.shape[i], indices.shape[i])
6689+
broadcast_shape_list[axis] = list(indices.shape)[axis]
6690+
broadcast_shape = tuple(broadcast_shape_list)
6691+
66876692
indices = paddle.broadcast_to(indices, broadcast_shape)
66886693
broadcast_shape_list = list(broadcast_shape)
66896694
broadcast_shape_list[axis] = list(arr.shape)[axis]

0 commit comments

Comments
 (0)