Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
#include "paddle/fluid/pir/transforms/fusion/fc_elementwise_layernorm_fuse_pass.h"
#include "paddle/fluid/pir/transforms/fusion/fc_fuse_pass.h"
#include "paddle/fluid/pir/transforms/fusion/matmul_scale_fuse_pass.h"
#include "paddle/fluid/pir/transforms/fusion/multihead_matmul_fuse_pass.h"
#include "paddle/fluid/pir/transforms/identity_op_clean_pass.h"
#include "paddle/fluid/pir/transforms/inplace_pass.h"
#include "paddle/fluid/pir/transforms/params_sync_among_devices_pass.h"
Expand Down Expand Up @@ -804,6 +805,7 @@ bool AnalysisPredictor::PrepareExecutor() {
gpu_pm.AddPass(::pir::CreateConv2dBnFusePass());
gpu_pm.AddPass(::pir::CreateConv2dAddActFusePass());
gpu_pm.AddPass(::pir::CreateConv2dAddFusePass());
gpu_pm.AddPass(::pir::CreateMultiHeadMatmulFusePass());
gpu_pm.AddPass(::pir::CreateFcFusePass());
gpu_pm.AddPass(::pir::CreateFcElementwiseLayerNormFusePass());
gpu_pm.AddPass(::pir::CreateMatmulScaleFusePass());
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/pir/drr/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,10 @@ class FusedLinearPattern : public paddle::drr::DrrPatternBase {
// Define ResultPattern
paddle::drr::ResultPattern res = pat.ResultPattern();
// Define Constrain
const auto &act_attr =
res.Attr([](const paddle::drr::MatchContext &match_ctx) -> std::any {
return "none";
});
const auto &fused_gemm_epilogue = res.Op(paddle::dialect::FusedGemmEpilogueOp::name(),
{{{"trans_x", pat.Attr("trans_x")},
{"trans_y", pat.Attr("trans_y")},
{"activation", act_attr}}});
{"activation", res.StrAttr("none")}}});
fused_gemm_epilogue(
{&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("bias")},
{&res.Tensor("out")});
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/pir/drr/README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,10 @@ class FusedLinearPattern : public paddle::drr::DrrPatternBase {
// 定义 Result Pattern
paddle::drr::ResultPattern res = pat.ResultPattern();
// 定义 Constrain
const auto &act_attr =
res.Attr([](const paddle::drr::MatchContext &match_ctx) -> std::any {
return "none";
});
const auto &fused_gemm_epilogue = res.Op(paddle::dialect::FusedGemmEpilogueOp::name(),
{{{"trans_x", pat.Attr("trans_x")},
{"trans_y", pat.Attr("trans_y")},
{"activation", act_attr}}});
{"activation", res.StrAttr("none")}}});
fused_gemm_epilogue(
{&res.Tensor("x"), &res.Tensor("w"), &res.Tensor("bias")},
{&res.Tensor("out")});
Expand Down
43 changes: 40 additions & 3 deletions paddle/fluid/pir/drr/include/drr_pattern_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#pragma once

#include <any>
#include <cstdint>
#include <functional>
#include <memory>
#include <string>
Expand Down Expand Up @@ -280,10 +281,46 @@ class ResultPattern {
return ctx_->ResultTensorPattern(Tensor::NONE_TENSOR_NAME);
}

Attribute Attr(const std::string& attr_name) const {
return NormalAttribute(attr_name);
Attribute StrAttr(const std::string& value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> std::string { return value; });
}

Attribute BoolAttr(bool value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> bool { return value; });
}

Attribute Int32Attr(int32_t value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> int32_t { return value; });
}

Attribute Int64Attr(int64_t value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> int64_t { return value; });
}
Attribute Attr(const AttrComputeFunc& attr_compute_func) const {

Attribute Float32Attr(float value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> float { return value; });
}

Attribute VectorInt64Attr(const std::vector<int64_t>& value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> std::vector<int64_t> {
return value;
});
}

Attribute VectorInt32Attr(const std::vector<int32_t>& value) const {
return ComputeAttr(
[=](const MatchContext& match_ctx) -> std::vector<int32_t> {
return value;
});
}

Attribute ComputeAttr(const AttrComputeFunc& attr_compute_func) const {
return ComputeAttribute(attr_compute_func);
}

Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/pir/drr/ir_operation_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "paddle/fluid/pir/dialect/operator/ir/manual_op.h"
#include "paddle/fluid/pir/dialect/operator/ir/pd_op.h"
#include "paddle/fluid/pir/drr/attr_type_uilts.h"
#include "paddle/fluid/pir/drr/include/drr_pattern_context.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/pir/core/builtin_op.h"
#include "paddle/pir/core/operation.h"
Expand Down
141 changes: 106 additions & 35 deletions paddle/fluid/pir/transforms/constant_folding_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,15 @@
#include "paddle/fluid/pir/transforms/pd_op_to_kernel_pass.h"
#include "paddle/fluid/pir/transforms/transform_general_functions.h"

#include "paddle/common/errors.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h"

#include "paddle/pir/core/builder.h"
#include "paddle/pir/core/builtin_attribute.h"
#include "paddle/pir/core/builtin_op.h"
#include "paddle/pir/core/builtin_type.h"
#include "paddle/pir/core/ir_context.h"
#include "paddle/pir/core/op_result.h"
#include "paddle/pir/core/op_trait.h"
Expand Down Expand Up @@ -83,31 +86,56 @@ class ConstantFoldingPattern : public pir::RewritePattern {
if (!op->operand_source(i) || !op->operand_source(i).type()) {
continue;
}
// 2. inputs must come from parameter op or constant op
// 2. inputs must come from ParameterOp/ConstantTensorOp/CombineOp
auto* prev_op = pir::GetDefiningOpForInput(op, i);
if (!prev_op || !(prev_op->isa<pir::ParameterOp>() ||
prev_op->isa<pir::ConstantTensorOp>())) {
prev_op->isa<pir::ConstantTensorOp>() ||
prev_op->isa<pir::CombineOp>())) {
return false;
}
// 3. inputs must be a dense tensor type
if (!op->operand_source(i)
.type()
.isa<paddle::dialect::DenseTensorType>()) {
return false;
if (prev_op->isa<pir::CombineOp>()) {
if (prev_op->result(0).use_count() > 1) {
return false;
}
for (uint32_t i = 0; i < prev_op->num_operands(); i++) {
if (!prev_op->operand_source(i) ||
!prev_op->operand_source(i).type()) {
continue;
}
// 3. for combine's prev op, inputs must come from
// ParameterOp/ConstantTensorOp
auto* prev_prev_op = pir::GetDefiningOpForInput(prev_op, i);
if (!prev_prev_op || !(prev_prev_op->isa<pir::ParameterOp>() ||
prev_prev_op->isa<pir::ConstantTensorOp>())) {
return false;
}
if (!prev_op->operand_source(i)
.type()
.isa<paddle::dialect::DenseTensorType>()) {
return false;
}
}
} else {
// 4. inputs must be a dense tensor type
if (!op->operand_source(i)
.type()
.isa<paddle::dialect::DenseTensorType>()) {
return false;
}
}
}

for (uint32_t i = 0; i < op->num_results(); i++) {
if (!op->result(i) || !op->result(i).type()) {
continue;
}
// 4. outputs must be a dense tensor type
// 5. outputs must be a dense tensor type
if (!op->result(i).type().isa<paddle::dialect::DenseTensorType>()) {
return false;
}
}

// 5. maybe affect performence
// 6. maybe affect performence
if (op->isa<paddle::dialect::FullOp>()) {
auto next_ops = pir::GetUseOpsForOutput(op, 0);
for (auto [next_op, _] : next_ops) {
Expand Down Expand Up @@ -183,7 +211,7 @@ class ConstantFoldingPattern : public pir::RewritePattern {
}

auto constant_op = rewriter.Build<pir::ConstantTensorOp>(
rewriter.tensor_name_attr(output_var_name), op->result(i).type());
output_var_name, op->result(i).type());
constant_op->set_attribute(
kAttrIsPersisable, rewriter.array_attr({rewriter.bool_attr(true)}));

Expand Down Expand Up @@ -233,7 +261,7 @@ class ConstantFoldingPattern : public pir::RewritePattern {
BuildProgramFromOperation(op, &new_program, rewriter);

// execute program
for (auto output_var_name : output_var_names) {
for (const auto& output_var_name : output_var_names) {
exe_config_->skip_gc_vars.insert(output_var_name);
}
auto kernel_program =
Expand All @@ -245,6 +273,29 @@ class ConstantFoldingPattern : public pir::RewritePattern {
return output_var_names;
}

template <typename Op>
Op BuildParameterOrConstantTensorOP(
uint32_t index,
pir::Operation* op,
pir::Builder& builder, // NOLINT
pir::PatternRewriter& rewriter) const { // NOLINT
const auto& var_name =
pir::GetParameterNameFromValue(op->operand_source(index));
auto* var = scope_->FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var,
phi::errors::InvalidArgument(
"Persisable var [%s] not in scope.", var_name));
auto from_op =
builder.Build<Op>(var_name, op->operand_source(index).type());
if (op->operand_source(index).use_count() <= 1) {
deleted_vars_->push_back(var_name);
} else {
from_op->set_attribute(kAttrIsPersisable,
rewriter.array_attr({rewriter.bool_attr(true)}));
}
return from_op;
}

std::vector<std::string> BuildProgramFromOperation(
pir::Operation* op,
pir::Program* new_program,
Expand All @@ -256,42 +307,62 @@ class ConstantFoldingPattern : public pir::RewritePattern {
std::vector<pir::Value> op_inputs;
for (uint32_t i = 0; i < op->num_operands(); i++) {
if (op->operand_source(i)) {
const auto& param_name =
pir::GetParameterNameFromValue(op->operand_source(i));
auto* param_var = scope_->FindVar(param_name);
PADDLE_ENFORCE_NOT_NULL(
param_var,
phi::errors::InvalidArgument("Parameter var [%s] not in scope.",
param_name));

auto parameter_op = builder.Build<pir::ParameterOp>(
param_name, op->operand_source(i).type());
if (op->operand_source(i).use_count() <= 1) {
deleted_vars_->push_back(param_name);
auto* prev_op = pir::GetDefiningOpForInput(op, i);
if (prev_op->isa<pir::CombineOp>()) {
// prepare combine op inputs
std::vector<pir::Value> combine_op_inputs;
for (uint32_t j = 0; j < prev_op->num_operands(); j++) {
auto* prev_prev_op = pir::GetDefiningOpForInput(prev_op, j);
if (prev_prev_op->isa<pir::ParameterOp>()) {
auto parameter_op =
BuildParameterOrConstantTensorOP<pir::ParameterOp>(
j, prev_op, builder, rewriter);
combine_op_inputs.push_back(parameter_op->result(0));
} else if (prev_prev_op->isa<pir::ConstantTensorOp>()) {
auto constant_op =
BuildParameterOrConstantTensorOP<pir::ConstantTensorOp>(
j, prev_op, builder, rewriter);
combine_op_inputs.push_back(constant_op->result(0));
} else {
PADDLE_THROW(phi::errors::Fatal(
"Not support %s before builtin.combine op!",
prev_prev_op->name()));
}
}
auto combine_op = builder.Build<pir::CombineOp>(combine_op_inputs);
op_inputs.push_back(combine_op->result(0));
} else if (prev_op->isa<pir::ParameterOp>()) {
auto parameter_op =
BuildParameterOrConstantTensorOP<pir::ParameterOp>(
i, op, builder, rewriter);
op_inputs.push_back(parameter_op->result(0));
} else if (prev_op->isa<pir::ConstantTensorOp>()) {
auto constant_op =
BuildParameterOrConstantTensorOP<pir::ConstantTensorOp>(
i, op, builder, rewriter);
op_inputs.push_back(constant_op->result(0));
} else {
parameter_op->set_attribute(
kAttrIsPersisable,
rewriter.array_attr({rewriter.bool_attr(true)}));
PADDLE_THROW(phi::errors::Fatal("Not support %s before matched op!",
prev_op->name()));
}
op_inputs.push_back(parameter_op->result(0));
} else {
op_inputs.push_back(
op->operand_source(i).dyn_cast<pir::OpResult>() /*nullptr*/);
}
}

// prepare op outputs
std::vector<pir::Type> output_types;
std::vector<pir::Type> op_output_types;
for (uint32_t i = 0; i < op->num_results(); i++) {
output_types.push_back(op->result(i).type());
op_output_types.push_back(op->result(i).type());
}

auto* temp_op =
builder.Build(op_inputs, op->attributes(), output_types, op->info());
auto* op_copy =
builder.Build(op_inputs, op->attributes(), op_output_types, op->info());

std::vector<std::string> output_var_names;
for (uint32_t i = 0; i < op->num_results(); i++) {
if (!temp_op->result(i) || !temp_op->result(i).type()) {
for (uint32_t i = 0; i < op_copy->num_results(); i++) {
if (!op_copy->result(i) || !op_copy->result(i).type()) {
continue;
}
std::stringstream ss;
Expand All @@ -301,7 +372,7 @@ class ConstantFoldingPattern : public pir::RewritePattern {
std::string output_var_name =
"constant_folding@_" + ss.str() + std::to_string((*suffix_)++);

builder.Build<pir::ShadowOutputOp>(temp_op->result(i), output_var_name);
builder.Build<pir::ShadowOutputOp>(op_copy->result(i), output_var_name);
output_var_names.push_back(output_var_name);
}

Expand Down Expand Up @@ -366,7 +437,7 @@ class ConstantFoldingPatternForTrain : public ConstantFoldingPattern {
output_var_name));

auto constant_op = rewriter.Build<pir::ConstantTensorOp>(
rewriter.tensor_name_attr(output_var_name), op->result(i).type());
output_var_name, op->result(i).type());
constant_op->set_attribute(
kAttrIsPersisable, rewriter.array_attr({rewriter.bool_attr(true)}));

Expand Down
Loading