diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 5dae6c1c845148..d3b74fb00c1c5e 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -234,6 +234,20 @@ void NaiveExecutor::RegisterInputHook(const HookFunc &hookfunc) { } } +void NaiveExecutor::RegisterOutputHook(const PirHookFunc &hookfunc) { + pir_output_hookfuncs_.push_back(hookfunc); + if (interpreter_core_) { + interpreter_core_->SetOutputHooks(pir_output_hookfuncs_); + } +} + +void NaiveExecutor::RegisterInputHook(const PirHookFunc &hookfunc) { + pir_input_hookfuncs_.push_back(hookfunc); + if (interpreter_core_) { + interpreter_core_->SetInputHooks(pir_input_hookfuncs_); + } +} + void NaiveExecutor::MakeReusePlan( const std::unordered_map &reuse_table) { std::unordered_map> clusters; diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index d36e3042b0b722..47f58924de1443 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -45,6 +45,9 @@ class NaiveExecutor { public: using HookFunc = std::function; + using PirHookFunc = + std::function; + explicit NaiveExecutor(const platform::Place& place) : place_(place) {} ~NaiveExecutor(); @@ -94,6 +97,8 @@ class NaiveExecutor { void RegisterOutputHook(const HookFunc& hookfunc); void RegisterInputHook(const HookFunc& hookfunc); + void RegisterOutputHook(const PirHookFunc& hookfunc); + void RegisterInputHook(const PirHookFunc& hookfunc); private: void CreateOps(const ProgramDesc& desc, int block_id); @@ -107,6 +112,9 @@ class NaiveExecutor { std::vector output_hookfuncs_; std::vector input_hookfuncs_; + std::vector pir_output_hookfuncs_; + std::vector pir_input_hookfuncs_; + // Record information that tensor_a should ShareBufferWith tensor_b. std::unordered_map> reuse_cache_; diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc index db8ef9f2de7bff..0730ef34f140bb 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.cc @@ -198,6 +198,16 @@ IfInstruction::~IfInstruction() { } } +void IfInstruction::SetOutputHooks(const std::vector& hookfuncs) { + true_branch_inter_->SetOutputHooks(hookfuncs); + false_branch_inter_->SetOutputHooks(hookfuncs); +} + +void IfInstruction::SetInputHooks(const std::vector& hookfuncs) { + true_branch_inter_->SetInputHooks(hookfuncs); + false_branch_inter_->SetInputHooks(hookfuncs); +} + void IfInstruction::Run() { bool cond = true; if (cond_var_->IsType()) { diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h index cf0de0fc3581f4..7667c9128a8a7f 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/if_instruction.h @@ -48,6 +48,10 @@ class IfInstruction : public InstructionBase { PirInterpreter* FalseBranchInterpreter() const { return false_branch_inter_; } + void SetOutputHooks(const std::vector& hookfuncs); + + void SetInputHooks(const std::vector& hookfuncs); + private: ::pir::Operation* op_; diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc index ae8b0d1df2eee5..e4cc8568bbf88c 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.cc @@ -240,6 +240,16 @@ void WhileInstruction::ShareDatasToOutputs() { } } +void WhileInstruction::SetOutputHooks( + const std::vector& hookfuncs) { + body_inter_->SetOutputHooks(hookfuncs); +} + +void WhileInstruction::SetInputHooks( + const std::vector& hookfuncs) { + body_inter_->SetInputHooks(hookfuncs); +} + void WhileInstruction::Run() { #ifdef PADDLE_WITH_DNNL // Executor on being destroyed clears oneDNN cache and resets diff --git a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h index 849d4ec4d184d1..b6f729a784f5a2 100644 --- a/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h +++ b/paddle/fluid/framework/new_executor/instruction/control_flow/while_instruction.h @@ -50,6 +50,10 @@ class WhileInstruction : public InstructionBase { PirInterpreter* BodyInterpreter() const { return body_inter_.get(); } + void SetOutputHooks(const std::vector& hookfuncs); + + void SetInputHooks(const std::vector& hookfuncs); + private: // 'output' = 'input' void ShareInputsToOutputs(); diff --git a/paddle/fluid/framework/new_executor/interpreter_base_impl.h b/paddle/fluid/framework/new_executor/interpreter_base_impl.h index e99a02f37136ed..1d9bac63d7c152 100644 --- a/paddle/fluid/framework/new_executor/interpreter_base_impl.h +++ b/paddle/fluid/framework/new_executor/interpreter_base_impl.h @@ -104,6 +104,10 @@ class InterpreterBaseImpl { virtual void SetInputHooks(const std::vector& hookfuncs) = 0; + virtual void SetOutputHooks(const std::vector& hookfuncs) = 0; + + virtual void SetInputHooks(const std::vector& hookfuncs) = 0; + virtual std::shared_ptr> GetDependencyCount() const = 0; virtual bool IsSharedResultsBuild() const = 0; diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 61151373b2a291..7bf78eed8b04ee 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -139,6 +139,15 @@ void InterpreterCore::SetOutputHooks(const std::vector& hookfuncs) { impl_->SetOutputHooks(hookfuncs); } +void InterpreterCore::SetInputHooks(const std::vector& hookfuncs) { + impl_->SetInputHooks(hookfuncs); +} + +void InterpreterCore::SetOutputHooks( + const std::vector& hookfuncs) { + impl_->SetOutputHooks(hookfuncs); +} + void InterpreterCore::Build( const std::vector& feed_names, std::vector* op_func_nodes) { diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index f2b4426b8ebb2c..39ad549a784557 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -14,6 +14,7 @@ #pragma once #include "paddle/fluid/framework/new_executor/interpreter_base_impl.h" +#include "paddle/fluid/framework/new_executor/new_executor_defs.h" PD_DECLARE_bool(new_executor_use_local_scope); @@ -88,6 +89,10 @@ class InterpreterCore { void SetInputHooks(const std::vector& hookfuncs); + void SetOutputHooks(const std::vector& hookfuncs); + + void SetInputHooks(const std::vector& hookfuncs); + void Build(const std::vector& feed_names, std::vector* op_func_nodes); diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index c416b151aef03a..79619828980aa3 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -40,9 +40,13 @@ COMMON_DECLARE_bool(dynamic_static_unified_comm); namespace paddle { namespace framework { +class InstructionBase; +class ValueExecutionInfo; using OpKernelComputeFunc = std::function; using HookFunc = std::function; +using PirHookFunc = + std::function; using SchedulingPriority = int64_t; diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.cc b/paddle/fluid/framework/new_executor/pir_interpreter.cc index 03439ad6fd417d..c2b234d8d667f2 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.cc +++ b/paddle/fluid/framework/new_executor/pir_interpreter.cc @@ -723,8 +723,16 @@ void PirInterpreter::BuildInstruction() { } } else if (op.dialect()->name() == "pd_op") { if (op.isa()) { // NOLINT - vec_instruction_base_.emplace_back(std::make_unique( - op_idx++, place_, &op, value_exe_info_.get(), execution_config_)); + std::unique_ptr if_instr_ptr = + std::make_unique(op_idx++, + place_, + &op, + value_exe_info_.get(), + execution_config_); + if_instr_ptr->SetOutputHooks(pir_output_hookfuncs_); + if_instr_ptr->SetInputHooks(pir_input_hookfuncs_); + vec_instruction_base_.emplace_back(std::move(if_instr_ptr)); + sub_blocks_.insert( {&op.dyn_cast().true_block(), dynamic_cast(vec_instruction_base_.back().get()) @@ -742,8 +750,16 @@ void PirInterpreter::BuildInstruction() { vec_instruction_base_.back().get()) ->ForwardInterpreter()}); } else if (op.isa()) { - vec_instruction_base_.emplace_back(std::make_unique( - op_idx++, place_, &op, value_exe_info_.get(), execution_config_)); + std::unique_ptr while_instr_ptr = + std::make_unique(op_idx++, + place_, + &op, + value_exe_info_.get(), + execution_config_); + while_instr_ptr->SetOutputHooks(pir_output_hookfuncs_); + while_instr_ptr->SetInputHooks(pir_input_hookfuncs_); + vec_instruction_base_.emplace_back(std::move(while_instr_ptr)); + sub_blocks_.insert( {&op.dyn_cast().body(), dynamic_cast(vec_instruction_base_.back().get()) @@ -1764,6 +1780,13 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) { << " runs on " << platform::GetCurrentThreadName() << "\n" << "Before: " << cur_place << " " << instr_node->DebugStringEx(scope_, value_exe_info_.get()); + + if (execution_config_.used_for_inference) { + for (auto& hook : pir_input_hookfuncs_) { + hook(instr_node, value_exe_info_.get(), scope_); + } + } + if (!instr_node->IsArtificial()) { instr_node->Run(); @@ -1789,6 +1812,13 @@ void PirInterpreter::RunInstructionBase(InstructionBase* instr_node) { VLOG(4) << "done CheckGC"; memory::LogDeviceMemoryStats(cur_place, instr_node->Name()); } + + if (execution_config_.used_for_inference) { + for (auto& hook : pir_output_hookfuncs_) { + hook(instr_node, value_exe_info_.get(), scope_); + } + } + VLOG(5) << "after run kernel"; instr_node->RecordEvent(cur_place); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/framework/new_executor/pir_interpreter.h b/paddle/fluid/framework/new_executor/pir_interpreter.h index e28e418b9dd952..9901dcf421cdc2 100644 --- a/paddle/fluid/framework/new_executor/pir_interpreter.h +++ b/paddle/fluid/framework/new_executor/pir_interpreter.h @@ -96,12 +96,16 @@ class PirInterpreter : public InterpreterBaseImpl { const platform::Place& GetPlace() const override { return place_; } - void SetOutputHooks(const std::vector& hookfuncs) override { - output_hookfuncs_ = hookfuncs; + void SetOutputHooks(const std::vector& hookfuncs) override {} + + void SetInputHooks(const std::vector& hookfuncs) override {} + + void SetOutputHooks(const std::vector& hookfuncs) override { + pir_output_hookfuncs_ = hookfuncs; } - void SetInputHooks(const std::vector& hookfuncs) override { - input_hookfuncs_ = hookfuncs; + void SetInputHooks(const std::vector& hookfuncs) override { + pir_input_hookfuncs_ = hookfuncs; } std::string GetNameByValue(::pir::Value value) const; @@ -200,8 +204,8 @@ class PirInterpreter : public InterpreterBaseImpl { int64_t onednn_op_num_{-1}; std::vector trace_execute_order_; - std::vector output_hookfuncs_; - std::vector input_hookfuncs_; + std::vector pir_output_hookfuncs_; + std::vector pir_input_hookfuncs_; /// ======================== /// /// For new ir /// diff --git a/paddle/fluid/framework/new_executor/program_interpreter.h b/paddle/fluid/framework/new_executor/program_interpreter.h index 7e956249e22a38..94a8af8197d117 100644 --- a/paddle/fluid/framework/new_executor/program_interpreter.h +++ b/paddle/fluid/framework/new_executor/program_interpreter.h @@ -101,6 +101,10 @@ class ProgramInterpreter : public InterpreterBaseImpl { input_hookfuncs_ = hookfuncs; } + void SetOutputHooks(const std::vector& hookfuncs) override {} + + void SetInputHooks(const std::vector& hookfuncs) override {} + std::unordered_map>* GetForceEventsToWaitInfo() { return force_events_to_wait_; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 56686a87fb3382..bee02f54614c81 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -31,6 +31,7 @@ #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/naive_executor.h" +#include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" @@ -3096,49 +3097,99 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { exe.Run(save_program, scope(), 0, true, true); } -void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) { - std::call_once(register_input_hook_flag_, [this] { - executor_->RegisterInputHook( - [this](framework::OperatorBase *op, framework::Scope *scope) { - for (auto &input : op->Inputs()) { - for (auto &var_name : input.second) { +void AnalysisPredictor::RegisterOutputHook( + const OutputTensorHookFunc &hookfunc) { + if (config_.new_ir_enabled()) { + std::call_once(register_output_hook_flag_, [this] { + executor_->RegisterOutputHook( + [this](framework::InstructionBase *instr, + framework::ValueExecutionInfo *value_exe_info, + framework::Scope *scope) { + for (auto &output : instr->Outputs()) { + auto var_name = value_exe_info->GetVarName(output.first); auto *var = scope->FindVar(var_name); if (!var || !var->IsType()) continue; auto dense_tensor = var->Get(); if (!dense_tensor.initialized()) continue; auto tensor = paddle::Tensor( std::make_shared(dense_tensor), var_name); - for (auto &hookfunc : this->input_hookfuncs_) { - hookfunc(op->Type(), var_name, tensor); + for (auto &hookfunc : this->output_hookfuncs_) { + hookfunc(instr->Name() + ":" + std::to_string(instr->Id()), + var_name, + tensor); } } - } - }); - }); - input_hookfuncs_.push_back(hookfunc); + }); + }); + output_hookfuncs_.push_back(hookfunc); + } else { + std::call_once(register_output_hook_flag_, [this] { + executor_->RegisterOutputHook( + [this](framework::OperatorBase *op, framework::Scope *scope) { + for (auto &output : op->Outputs()) { + for (auto &var_name : output.second) { + auto *var = scope->FindVar(var_name); + if (!var || !var->IsType()) continue; + auto dense_tensor = var->Get(); + if (!dense_tensor.initialized()) continue; + auto tensor = paddle::Tensor( + std::make_shared(dense_tensor), var_name); + for (auto &hookfunc : this->output_hookfuncs_) { + hookfunc(op->Type(), var_name, tensor); + } + } + } + }); + }); + output_hookfuncs_.push_back(hookfunc); + } } -void AnalysisPredictor::RegisterOutputHook( - const OutputTensorHookFunc &hookfunc) { - std::call_once(register_output_hook_flag_, [this] { - executor_->RegisterOutputHook( - [this](framework::OperatorBase *op, framework::Scope *scope) { - for (auto &output : op->Outputs()) { - for (auto &var_name : output.second) { +void AnalysisPredictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) { + if (config_.new_ir_enabled()) { + std::call_once(register_input_hook_flag_, [this] { + executor_->RegisterInputHook( + [this](framework::InstructionBase *instr, + framework::ValueExecutionInfo *value_exe_info, + framework::Scope *scope) { + for (auto &input : instr->Inputs()) { + auto var_name = value_exe_info->GetVarName(input.first); auto *var = scope->FindVar(var_name); if (!var || !var->IsType()) continue; auto dense_tensor = var->Get(); if (!dense_tensor.initialized()) continue; auto tensor = paddle::Tensor( std::make_shared(dense_tensor), var_name); - for (auto &hookfunc : this->output_hookfuncs_) { - hookfunc(op->Type(), var_name, tensor); + for (auto &hookfunc : this->input_hookfuncs_) { + hookfunc(instr->Name() + ":" + std::to_string(instr->Id()), + var_name, + tensor); } } - } - }); - }); - output_hookfuncs_.push_back(hookfunc); + }); + }); + input_hookfuncs_.push_back(hookfunc); + } else { + std::call_once(register_input_hook_flag_, [this] { + executor_->RegisterInputHook( + [this](framework::OperatorBase *op, framework::Scope *scope) { + for (auto &input : op->Inputs()) { + for (auto &var_name : input.second) { + auto *var = scope->FindVar(var_name); + if (!var || !var->IsType()) continue; + auto dense_tensor = var->Get(); + if (!dense_tensor.initialized()) continue; + auto tensor = paddle::Tensor( + std::make_shared(dense_tensor), var_name); + for (auto &hookfunc : this->input_hookfuncs_) { + hookfunc(op->Type(), var_name, tensor); + } + } + } + }); + }); + input_hookfuncs_.push_back(hookfunc); + } } template <> @@ -3439,7 +3490,7 @@ uint64_t Predictor::TryShrinkMemory() { return predictor_->TryShrinkMemory(); } void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) { predictor_->RegisterOutputHook(hookfunc); } -void Predictor::RegisterInputHook(const OutputTensorHookFunc &hookfunc) { +void Predictor::RegisterInputHook(const InputTensorHookFunc &hookfunc) { predictor_->RegisterInputHook(hookfunc); } diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 2996133948cc6e..457bc649f98d19 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -1225,8 +1225,8 @@ void BindPaddleInferPredictor(py::module *m) { .def("try_shrink_memory", &paddle_infer::Predictor::TryShrinkMemory) .def("clear_intermediate_tensor", &paddle_infer::Predictor::ClearIntermediateTensor) - .def("register_output_hook", - &paddle_infer::Predictor::RegisterOutputHook); + .def("register_output_hook", &paddle_infer::Predictor::RegisterOutputHook) + .def("register_input_hook", &paddle_infer::Predictor::RegisterInputHook); } void BindZeroCopyTensor(py::module *m) {