Conversation
paddle/operators/transpose_op.cu
Outdated
| int* host_buffer_data = host_buffer.mutable_data<int>(buffer_dims, cpu_place); | ||
|
|
||
| auto offset_buffer = | ||
| memory::Alloc(context.GetPlace(), ndims * 3 * sizeof(int)); |
There was a problem hiding this comment.
这里接口使用不当,context.GetPlace() 返回的是 Place,是一个boost::variant<CPUPlace, GPUPlace>,而Alloc需要接收一个确定的GPUPlace or CPUPlace
paddle/operators/transpose_op.cu
Outdated
| } | ||
|
|
||
| memory::Copy(gpu_place, offset_buffer, cpu_place, host_buffer_data, | ||
| ndims * 3 * sizeof(int)); |
There was a problem hiding this comment.
在memory copy的时候,需要指定对应的cuda Stream
template <typename DstPlace, typename SrcPlace>
void Copy(DstPlace, void* dst, SrcPlace, const void* src, size_t num,
cudaStream_t stream);
cuda stream 可以从context里面拿
Xreki
left a comment
There was a problem hiding this comment.
- Merge the latest develop first
- Need more unit test cases. In particular, we need a unit test with large Tensor to test the CUDA kernel.
paddle/operators/transpose_op.cc
Outdated
|
|
||
| #include "paddle/operators/transpose_op.h" | ||
| #include <vector> | ||
| #include "paddle/framework/ddim.h" |
paddle/operators/transpose_op.cc
Outdated
|
|
||
| PADDLE_ENFORCE_EQ( | ||
| in_dim_size, axis_size, | ||
| "the input tensor dimensions should be equal to the axis size"); |
There was a problem hiding this comment.
Print the value of in_dim_size and axis_size in error message.
input tensor dimensions -> input tensor's dimension
axis size -> axis's size
paddle/operators/transpose_op.cc
Outdated
| std::vector<int> axis_sorted(axis); | ||
| std::sort(axis_sorted.begin(), axis_sorted.end()); | ||
| for (size_t i = 0; i < axis_sorted.size(); i++) { | ||
| PADDLE_ENFORCE_EQ(axis_sorted[i], (int)i, |
paddle/operators/transpose_op.cc
Outdated
| for (size_t i = 0; i < axis_sorted.size(); i++) { | ||
| PADDLE_ENFORCE_EQ(axis_sorted[i], (int)i, | ||
| "the sorted axis should be [0, 1, ... dims - 1], " | ||
| "the dims equals to the input tensor dimensions"); |
There was a problem hiding this comment.
where the dims is the axis's size.
paddle/operators/transpose_op.cc
Outdated
| framework::OpAttrChecker *op_checker) | ||
| : OpProtoAndCheckerMaker(proto, op_checker) { | ||
| AddInput("X", "The input of transpose op"); | ||
| AddOutput("Out", "The output of transpose op"); |
There was a problem hiding this comment.
Please refine the comments according to https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md.
paddle/operators/transpose_op.h
Outdated
| } | ||
|
|
||
| template <typename Place, typename T, int Dims> | ||
| void DoTranspose(const framework::ExecutionContext& context, |
There was a problem hiding this comment.
Since this function calls Eigen to do the transpose, how about renaming it to EigenCpuTranspose?
paddle/operators/transpose_op.h
Outdated
| out->mutable_data<T>(context.GetPlace()); | ||
|
|
||
| auto axis_temp = context.Attr<std::vector<int>>("axis"); | ||
| std::vector<int> axis(axis_temp); |
There was a problem hiding this comment.
How about : axis_temp -> axis and axis -> axis_grad
There was a problem hiding this comment.
there is no grad for axis
paddle/operators/transpose_op.h
Outdated
| public: | ||
| void Compute(const framework::ExecutionContext& context) const override { | ||
| auto* in = context.Input<framework::Tensor>(framework::GradVarName("Out")); | ||
| auto* out = context.Output<framework::Tensor>(framework::GradVarName("X")); |
There was a problem hiding this comment.
in -> in_grad, out -> out_grad
paddle/operators/transpose_op.h
Outdated
| case 5: | ||
| DoTranspose<Place, T, 5>(context, *in, *out, axis); | ||
| break; | ||
| default: |
There was a problem hiding this comment.
When ndims is 1, calling NaiveCpuTranspose?
There was a problem hiding this comment.
When ndmis > 5, calling NaiveCpuTranspose
paddle/operators/transpose_op.cu
Outdated
| namespace operators { | ||
|
|
||
| template <typename T> | ||
| __global__ void transpose_kernel(int nthreads, const T* in_data, T* out_data, |
There was a problem hiding this comment.
I think this CUDA function can be renamed to NaiveCUDATranspose, and we may need some optimized implementation in the future.
paddle/operators/transpose_op.cc
Outdated
|
|
||
| protected: | ||
| void InferShape(const framework::InferShapeContext &ctx) const override { | ||
| auto in_dim = ctx.Input<Tensor>("X")->dims(); |
There was a problem hiding this comment.
InferShape里的check需要全面,对输入检查NOT_NULL,例如:https://github.com/PaddlePaddle/Paddle/pull/4086/files#diff-1fcd5ee1c1e63ed40789a0e60fdb1bf6R29
paddle/operators/transpose_op.cc
Outdated
| for (size_t i = 0; i < axis.size(); i++) { | ||
| out_dim[i] = in_dim[axis[i]]; | ||
| } | ||
| ctx.Output<Tensor>("Out")->Resize(out_dim); |
There was a problem hiding this comment.
现在需要在InferShape里对Output使用LoDTensor: Output< framework::Tensor>
paddle/operators/transpose_op.cc
Outdated
| PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), | ||
| "Input(Out@GRAD) should not be null"); | ||
| auto x_dims = ctx.Input<Tensor>("X")->dims(); | ||
| auto *x_grad = ctx.Output<Tensor>(framework::GradVarName("X")); |
There was a problem hiding this comment.
Output< framework::LoDTensor>
paddle/operators/transpose_op.cu
Outdated
| namespace operators { | ||
|
|
||
| template <typename T> | ||
| __global__ void transpose_kernel(int nthreads, const T* in_data, T* out_data, |
There was a problem hiding this comment.
这个kernel效率太低,可以考虑Eigen::shuffle
这样CPU,GPU同时支持。
There was a problem hiding this comment.
have deleted this kernel and used the eigen interface
| break; | ||
| case 5: | ||
| DoTranspose<Place, T, 5>(context, *in, *out, axis); | ||
| break; |
There was a problem hiding this comment.
感觉rank 5也不差多了,觉得可以去掉NaiveCpuTranspose, 直接用Eigen::shuffle, 这个不支持GPU吗? rank > 5 时:
PADDLE_THROW("Tensors with rank at most 6 are supported").
There was a problem hiding this comment.
ok,如果对与维度大于5的不支持的话,是可以的,省了很多代码,包括NaiveCpuTranspose 以及 Gpu kernel的代码
| import numpy as np | ||
| from gradient_checker import GradientChecker | ||
| from op_test_util import OpTestMeta | ||
| from paddle.v2.framework.op import Operator |
|
|
||
| self.check_grad(op, inputs, set(["X"]), "Out", max_relative_error=0.5) | ||
|
|
||
|
|
Xreki
left a comment
There was a problem hiding this comment.
我觉得总体上已经差不多OK了。主要看看输入输出以及Op的comment怎么修缮下,以后要生成文档的。
| auto axis = context.Attr<std::vector<int>>("axis"); | ||
| int ndims = axis.size(); | ||
| switch (ndims) { | ||
| case 1: |
There was a problem hiding this comment.
这个op是否支持1-D的输入呢?如果支持,这里应该是copy操作;如果不支持,在InterShape里面应该使用PADDLE_ENFORCE进行检查。
paddle/operators/transpose_op.cc
Outdated
| protected: | ||
| void InferShape(const framework::InferShapeContext &ctx) const override { | ||
| PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Input"), | ||
| "Input(Input) should not be null"); |
paddle/operators/transpose_op.cc
Outdated
| : OpProtoAndCheckerMaker(proto, op_checker) { | ||
| AddInput( | ||
| "Input", | ||
| "(Tensor)The input tensor, tensors with rank at most 7 are supported"); |
paddle/operators/transpose_op.cc
Outdated
| ctx.Input<Tensor>(framework::GradVarName("Output"))->dims(); | ||
| auto output_dims = ctx.Input<Tensor>("Output")->dims(); | ||
|
|
||
| PADDLE_ENFORCE(output_grad_dims == output_dims, |
paddle/operators/transpose_op.h
Outdated
|
|
||
| switch (ndims) { | ||
| case 1: | ||
| break; |
paddle/operators/transpose_op.cc
Outdated
| PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Input"), | ||
| "Input(Input) should not be null"); | ||
| auto input_dim = ctx.Input<Tensor>("Input")->dims(); | ||
| auto axis = ctx.Attr<std::vector<int>>("axis"); |
There was a problem hiding this comment.
Writing std:vector<int> is not complex. Don't overuse auto. It may make readers confused about variable type.
paddle/operators/transpose_op.cc
Outdated
| "Input(Input) should not be null"); | ||
| auto input_dim = ctx.Input<Tensor>("Input")->dims(); | ||
| auto axis = ctx.Attr<std::vector<int>>("axis"); | ||
| size_t input_dim_size = input_dim.size(); |
There was a problem hiding this comment.
The size of a tensor's dimensions is called rank.
paddle/operators/transpose_op.cc
Outdated
| size_t axis_size = axis.size(); | ||
|
|
||
| PADDLE_ENFORCE_EQ(input_dim_size, axis_size, | ||
| "the input tensor's dimension(%d) " |
There was a problem hiding this comment.
"the input tensor's rank(%d) "
| PADDLE_ENFORCE_EQ(axis_sorted[i], static_cast<int>(i), | ||
| "the sorted axis should be [0, 1, ... dims - 1], " | ||
| "where the dims is the axis's size"); | ||
| } |
There was a problem hiding this comment.
I think traversing axis and recording times that each number occurs is shorter and faster than the current implementation:
std::vector<int> count(axis_size, 0);
for (size_t i = 0; i < axis.size(); i++) {
PADDLE_ENFORCE(axis[i] < axis_size && ++count[axis[i]] == 1,
"Attribute axis should be a permutation of [0, 1, ... dims - 1], "
"where the dims is the axis's size");
}
paddle/operators/transpose_op.cc
Outdated
| AddAttr<std::vector<int>>( | ||
| "axis", | ||
| "(vector<int>)a list of values, and the size of the list should be " | ||
| "the same with the input tensor dimensions, the tensor will " |
There was a problem hiding this comment.
'the input tensor's rank'
paddle/operators/transpose_op.cc
Outdated
| ctx.Output<framework::LoDTensor>(framework::GradVarName("Input")); | ||
|
|
||
| auto output_grad_dims = | ||
| ctx.Input<Tensor>(framework::GradVarName("Output"))->dims(); |
There was a problem hiding this comment.
Assert output_grad_dims is not nullptr.
paddle/operators/transpose_op.cc
Outdated
| PADDLE_ENFORCE(output_grad_dims == output_dims, | ||
| "Output@GRAD dims must equal to Input(Input) dims"); | ||
|
|
||
| input_grad->Resize(input_dims); |
There was a problem hiding this comment.
input_grad can be nullptr, which means it is useless for backward, and we don't need to resize and compute it.
paddle/operators/transpose_op.h
Outdated
| context.Input<framework::Tensor>(framework::GradVarName("Output")); | ||
| auto* input_grad = | ||
| context.Output<framework::Tensor>(framework::GradVarName("Input")); | ||
| input_grad->mutable_data<T>(context.GetPlace()); |
There was a problem hiding this comment.
Check input_grad first. If it is nullptr we don't need to do the following computing.
An example: https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cos_sim_op.h#L104
paddle/operators/transpose_op.h
Outdated
| std::vector<int> axis(axis_temp); | ||
|
|
||
| for (size_t i = 0; i < axis.size(); i++) { | ||
| axis[axis_temp[i]] = i; |
There was a problem hiding this comment.
How about rename axis_temp to axis and rename current axis to reversed_axis?
| class TestCase4(TestTransposeOp): | ||
| def initTestCase(self): | ||
| self.shape = (2, 3, 4, 5, 6, 1) | ||
| self.axis = (4, 2, 3, 1, 0, 5) |
There was a problem hiding this comment.
Here should be another test about whether our Op can throw an exception correctly. However, our framework can't support such a test right now. So I leave a comment here to remind us there is something TODO. I have created an issue about this: #4173
…ent when input_grad is null
paddle/operators/transpose_op.cc
Outdated
| AddInput( | ||
| "Input", | ||
| "(Tensor)The input tensor, tensors with rank at most 6 are supported"); | ||
| AddOutput("Output", "(Tensor)The output tensor"); |
There was a problem hiding this comment.
The names of a single in/out Op's input and output should be X and Out respectively.
See the document: https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md
* add faq * Update README_cn.md * Update FAQ-README.md * Update FAQ第一期.md * Rename FAQ-README.md to README.md * Update README_cn.md * Update FAQ第一期.md * delete 2 files * Delete .DS_Store
fix #4163