Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 67 additions & 8 deletions paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "paddle/cinn/optim/replace_var_with_expr.h"

PD_DECLARE_bool(cinn_new_group_scheduler);
PD_DECLARE_bool(group_schedule_tiling_first);
PD_DECLARE_bool(cinn_bucket_compile);

namespace cinn {
Expand Down Expand Up @@ -93,9 +94,46 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
std::vector<ir::Expr> iter_values;
// reduce body and reduce init schedule block should have different objects
// for same axis so we re-create objects
VLOG(4) << "FLAGS_group_schedule_tiling_first = "
<< FLAGS_group_schedule_tiling_first;
std::vector<Var> axis_vars = cinn::common::GenDefaultAxis(axis_len);
const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
const auto reduce_axis_position = [&reduce_axis,
&tensor]() -> std::vector<int> {
VLOG(4) << "start calculus reduce_axis_position: ";
std::vector<int> res;
auto fn_body = tensor->operation.ptr()->as<ir::ComputeOp>()->body[0];
if (fn_body.defined() && fn_body.As<ir::Reduce>()) {
auto& reduce_body =
fn_body.As<ir::Reduce>()->body; // reduce body is a tensor store.
auto& load_indices = reduce_body.As<ir::Load>()->indices;
int position = -1;
for (auto& obj : load_indices) {
position += 1;
for (auto& reduce_var : reduce_axis) {
if (obj.as_var_ref() == reduce_var) {
res.push_back(position);
}
}
}
for (auto i : res) {
VLOG(4) << "reduce axis position is " << i;
}
return res;
}
}();
for (int i = 0; i < shape.size(); ++i) {
if (FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
if (FLAGS_group_schedule_tiling_first &&
std::find(reduce_axis_position.begin(),
reduce_axis_position.end(),
i) != reduce_axis_position.end()) {
// if tiling first, we need to replace the reduce axis with 0, but don't
// deal with the non-reduce axis
optim::ReplaceVarWithExpr(&init_body, axis[i], Expr(0));
continue;
}
if (!FLAGS_group_schedule_tiling_first &&
FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
optim::ReplaceVarWithExpr(&init_body, axis[i], Expr(0));
continue;
}
Expand All @@ -105,7 +143,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
/*is_reduce = */ false));
optim::ReplaceVarWithExpr(&init_body, axis[i], block_vars.back());
axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
if (!FLAGS_group_schedule_tiling_first && shape[i] == Expr(1)) {
iter_values.push_back(Expr(0));
} else {
iter_values.push_back(axis_vars[i]);
Expand All @@ -117,7 +155,6 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
block_vars, {}, {}, reduce_init_name, init_body));

// For the remaining reduce axis, make reduce body
const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
ir::Expr reduce_body =
ConvertReduceBody(tensor->body(), tensor, axis_exprs);
// create schedule block itervars, i0,i1...
Expand All @@ -127,7 +164,17 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
// for same axis so we re-create objects
std::vector<Var> reduce_axis_vars = cinn::common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
if (FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
if (FLAGS_group_schedule_tiling_first &&
std::find(reduce_axis_position.begin(),
reduce_axis_position.end(),
i) != reduce_axis_position.end()) {
// if tiling first, we need to replace the reduce axis with 0, but don't
// deal with the non-reduce axis
optim::ReplaceVarWithExpr(&reduce_body, axis[i], Expr(0));
continue;
}
if (!FLAGS_group_schedule_tiling_first &&
FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
optim::ReplaceVarWithExpr(&reduce_body, axis[i], Expr(0));
continue;
}
Expand All @@ -136,7 +183,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
cinn::UniqName("i" + std::to_string(i)),
/*is_reduce = */ false));
reduce_axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
if (!FLAGS_group_schedule_tiling_first && shape[i] == Expr(1)) {
reduce_iter_values.push_back(Expr(0));
} else {
reduce_iter_values.push_back(axis_vars[i]);
Expand All @@ -156,13 +203,18 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {

int non_zero_axis_size = 0;
for (int i = 0; i < axis.size(); ++i) {
if (FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
if (!FLAGS_group_schedule_tiling_first &&
FLAGS_cinn_new_group_scheduler && shape[i] == Expr(1)) {
continue;
}
optim::ReplaceVarWithExpr(
&reduce_body, axis[i], reduce_block_vars[non_zero_axis_size]);
++non_zero_axis_size;
}

if (FLAGS_group_schedule_tiling_first) {
non_zero_axis_size = axis.size() - reduce_axis.size();
}
for (int i = non_zero_axis_size; i < reduce_block_vars.size(); ++i) {
optim::ReplaceVarWithExpr(&reduce_body,
reduce_axis[i - non_zero_axis_size],
Expand All @@ -185,7 +237,14 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
// Put the two parts together
ir::Expr body = ir::Block::Make({init_body, reduce_body});
for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
if (!FLAGS_cinn_bucket_compile && shape[i] == Expr(1)) {
if (FLAGS_group_schedule_tiling_first &&
std::find(reduce_axis_position.begin(),
reduce_axis_position.end(),
i) != reduce_axis_position.end()) {
continue;
}
if (!FLAGS_group_schedule_tiling_first && !FLAGS_cinn_bucket_compile &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(!FLAGS_group_schedule_tiling_first || !FLAGS_cinn_bucket_compile) && shape[i] == Expr(1)

shape[i] == Expr(1)) {
continue;
}
ir::Var loop_var = axis[i];
Expand All @@ -210,7 +269,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
Expr(0), shape[i], cinn::UniqName("i" + std::to_string(i)), false));
optim::ReplaceVarWithExpr(&body, axis[i], block_vars[i]);
axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
if (!FLAGS_group_schedule_tiling_first && shape[i] == Expr(1)) {
iter_values.push_back(Expr(0));
} else {
iter_values.push_back(axis_vars[i]);
Expand Down
4 changes: 4 additions & 0 deletions paddle/cinn/runtime/flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ PD_DEFINE_bool(cinn_bucket_compile,
BoolFromEnv("FLAGS_cinn_bucket_compile", false),
"Whether to enable bucket compile for dynamic shape.");

PD_DEFINE_bool(group_schedule_tiling_first,
BoolFromEnv("FLAGS_group_schedule_tiling_first", false),
"Whether to enable new group scheduler tiling first strategy.");

PD_DEFINE_bool(cinn_use_common_subexpression_elimination,
BoolFromEnv("FLAGS_cinn_use_common_subexpression_elimination",
false),
Expand Down