Skip to content

Commit 3809d3c

Browse files
committed
modified: reduce_sum /reduce_mean for reduce_op.cu.h
1 parent 25ba21c commit 3809d3c

File tree

4 files changed

+6
-86
lines changed

4 files changed

+6
-86
lines changed

paddle/fluid/operators/reduce_ops/reduce_all_op.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h"
1616
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
1717

18-
// reduce_prod
1918
REGISTER_OP_CUDA_KERNEL(
2019
reduce_all,
2120
ops::ReduceCudaKernel<bool, paddle::operators::CustomLogicalAnd>);

paddle/fluid/operators/reduce_ops/reduce_any_op.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
1717
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
1818

19-
// reduce_prod
2019
REGISTER_OP_CUDA_KERNEL(
2120
reduce_any,
2221
ops::ReduceCudaKernel<bool, paddle::operators::CustomLogicalOr>);

paddle/fluid/operators/reduce_ops/reduce_mean_op.cu

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,52 +13,18 @@
1313
// limitations under the License.
1414

1515
#include <vector>
16-
#include "paddle/fluid/operators/reduce_ops/cub_reduce.h"
16+
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
1717
#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h"
18+
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
1819

1920
namespace paddle {
2021
namespace operators {
2122

22-
template <typename T>
23-
struct DivideFunctor {
24-
HOSTDEVICE explicit inline DivideFunctor(int n) : n_inv((T)(1.0 / n)) {}
25-
26-
HOSTDEVICE inline T operator()(const T& x) const { return x * n_inv; }
27-
28-
private:
29-
T n_inv;
30-
};
31-
3223
template <typename T>
3324
class ReduceMeanKernel : public framework::OpKernel<T> {
3425
public:
3526
void Compute(const framework::ExecutionContext& context) const override {
36-
bool reduce_all = context.Attr<bool>("reduce_all");
37-
auto* input = context.Input<Tensor>("X");
38-
auto* output = context.Output<Tensor>("Out");
39-
40-
auto dims = context.Attr<std::vector<int>>("dim");
41-
bool keep_dim = context.Attr<bool>("keep_dim");
42-
43-
std::vector<int> reduce_dims;
44-
if (reduce_all) {
45-
reduce_dims.resize(input->dims().size());
46-
for (int i = 0; i < reduce_dims.size(); ++i) reduce_dims[i] = i;
47-
} else {
48-
for (auto e : dims) {
49-
reduce_dims.push_back(e >= 0 ? e : e + input->dims().size());
50-
}
51-
}
52-
53-
int reduce_num = 1;
54-
for (int i = 0; i < reduce_dims.size(); ++i) {
55-
reduce_num *= input->dims()[reduce_dims[i]];
56-
}
57-
58-
auto stream = context.cuda_device_context().stream();
59-
TensorReduce<T, T, cub::Sum, DivideFunctor<T>>(
60-
*input, output, reduce_dims, static_cast<T>(0), cub::Sum(),
61-
DivideFunctor<T>(reduce_num), stream);
27+
Reduce<T, CustomMean>(context);
6228
}
6329
};
6430

paddle/fluid/operators/reduce_ops/reduce_sum_op.cu

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -11,62 +11,18 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
15-
#include "paddle/fluid/operators/reduce_ops/cub_reduce.h"
14+
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
15+
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
1616
#include "paddle/fluid/operators/reduce_ops/reduce_sum_op.h"
1717

1818
namespace paddle {
1919
namespace operators {
2020

21-
template <typename Tout>
22-
struct IdentityFunctor {
23-
HOSTDEVICE explicit inline IdentityFunctor() {}
24-
25-
template <typename U>
26-
HOSTDEVICE inline Tout operator()(const U& x) const {
27-
return static_cast<Tout>(x);
28-
}
29-
};
30-
3121
template <typename T>
3222
class ReduceSumKernel : public framework::OpKernel<T> {
3323
public:
3424
void Compute(const framework::ExecutionContext& context) const override {
35-
bool reduce_all = context.Attr<bool>("reduce_all");
36-
auto* input = context.Input<Tensor>("X");
37-
auto* output = context.Output<Tensor>("Out");
38-
auto out_dtype = context.Attr<int>("out_dtype");
39-
40-
auto dims = context.Attr<std::vector<int>>("dim");
41-
bool keep_dim = context.Attr<bool>("keep_dim");
42-
43-
std::vector<int> reduce_dims;
44-
if (reduce_all) {
45-
reduce_dims.resize(input->dims().size());
46-
for (int i = 0; i < reduce_dims.size(); ++i) reduce_dims[i] = i;
47-
} else {
48-
for (auto e : dims) {
49-
reduce_dims.push_back(e >= 0 ? e : e + input->dims().size());
50-
}
51-
}
52-
53-
int reduce_num = 1;
54-
for (int i = 0; i < reduce_dims.size(); ++i) {
55-
reduce_num *= input->dims()[reduce_dims[i]];
56-
}
57-
58-
auto stream = context.cuda_device_context().stream();
59-
if (out_dtype >= 0) {
60-
framework::VisitDataTypeSmall(
61-
static_cast<framework::proto::VarType::Type>(out_dtype),
62-
TensorReduceFunctor<T, cub::Sum, IdentityFunctor>(
63-
*input, output, reduce_dims, static_cast<double>(0.0), cub::Sum(),
64-
stream));
65-
} else {
66-
TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
67-
*input, output, reduce_dims, static_cast<T>(0), cub::Sum(),
68-
IdentityFunctor<T>(), stream);
69-
}
25+
Reduce<T, CustomSum>(context);
7026
}
7127
};
7228

0 commit comments

Comments
 (0)