Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions paddle/framework/data_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ limitations under the License. */
namespace paddle {
namespace framework {

using DataTransformFN =
using DataTransformFn =
std::function<void(const std::vector<platform::DeviceContext*> ctx,
const Variable& in, Variable* out)>;
using KernelTypePair = std::pair<OpKernelType, OpKernelType>;
Expand All @@ -47,7 +47,7 @@ struct KernelTypePairHash {
};

using DataTransformMap =
std::unordered_map<KernelTypePair, DataTransformFN, KernelTypePairHash>;
std::unordered_map<KernelTypePair, DataTransformFn, KernelTypePairHash>;

class DataTransformFnMap {
public:
Expand All @@ -58,25 +58,25 @@ class DataTransformFnMap {
}

void Insert(const OpKernelType& left, const OpKernelType& right,
const DataTransformFN& data_tranform_fn) {
const DataTransformFn& data_tranform_fn) {
Insert(std::make_pair(left, right), data_tranform_fn);
}

void Insert(const KernelTypePair& kernel_type_pair,
const DataTransformFN& data_tranform_fn) {
const DataTransformFn& data_tranform_fn) {
PADDLE_ENFORCE(!Has(kernel_type_pair),
"KernelTypePair %s has been registered", "");
map_.insert({kernel_type_pair, data_tranform_fn});
}

const DataTransformFN& Get(const KernelTypePair& key_pair) const {
const DataTransformFn& Get(const KernelTypePair& key_pair) const {
auto data_transformer = GetNullable(key_pair);
PADDLE_ENFORCE_NOT_NULL(data_transformer,
"DataTransformFN should not be NULL");
"DataTransformFn should not be NULL");
return *data_transformer;
}

const DataTransformFN* GetNullable(const KernelTypePair& key_pair) const {
const DataTransformFn* GetNullable(const KernelTypePair& key_pair) const {
auto it = map_.find(key_pair);
if (it == map_.end()) {
return nullptr;
Expand Down
8 changes: 8 additions & 0 deletions paddle/framework/op_kernel_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ struct OpKernelType {
data_type_ == o.data_type_ && data_layout_ == o.data_layout_ &&
library_type_ == o.library_type_;
}

bool operator!=(const OpKernelType& o) const { return !(*this == o); }
};

inline std::ostream& operator<<(std::ostream& os,
Expand All @@ -78,5 +80,11 @@ inline std::ostream& operator<<(std::ostream& os,
return os;
}

inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
std::ostringstream stream;
stream << kernel_key;
return stream.str();
}

} // namespace framework
} // namespace paddle
4 changes: 1 addition & 3 deletions paddle/framework/op_kernel_type_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ TEST(OpKernelType, ToString) {
OpKernelType op_kernel_type(DataType::FP32, CPUPlace(), DataLayout::kNCHW,
LibraryType::kCUDNN);

std::ostringstream stream;
stream << op_kernel_type;
ASSERT_EQ(
stream.str(),
paddle::framework::KernelTypeToString(op_kernel_type),
"data_type[5]:data_layout[NCHW]:place[CPUPlace]:library_type[CUDNN]");
}

Expand Down
64 changes: 39 additions & 25 deletions paddle/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,37 +413,51 @@ void OperatorWithKernel::Run(const Scope& scope,
}

if (actual_kernel_key == expected_kernel_key) {
kernel_iter->second->Compute(ctx);
PADDLE_ENFORCE_EQ(actual_kernel_key.place_, expected_kernel_key.place_,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actual_kernel_key == expected_kernel_key

是不是默认包含了

actual_kernel_key.place_ == expected_kernel_key.place_

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不是的,kernel key重载的相等操作符是判断是同一种place,并不能分辨CUDAPlace0 和 CUDApLace1

"Currently, model parallelism is only supported between "
"CPU and other devices. For example, multi-GPU model "
"parallelism will failed.");
} else {
Scope& op_scope = scope.NewScope();
auto input_vars = this->InputVars();
for (auto var_name : input_vars) {
op_scope.Var(var_name);
}

// TODO(qijun) get appropriate DeviceContext from DeviceContext pool
platform::DeviceContext* trans_dev_ctx = nullptr;
std::vector<platform::DeviceContext*> trans_dev_ctx_vec{trans_dev_ctx};
const DataTransformFn* trans_fun =
DataTransformFnMap::Instance().GetNullable(
std::make_pair(actual_kernel_key, expected_kernel_key));
if (trans_fun) {
auto input_vars = this->InputVars();
// TODO(qijun) filter the input vars that do not need to be transformed

// filter vars that has been transformed
std::vector<std::string> need_trans;
for (auto var_name : input_vars) {
auto var_name_trans =
var_name + framework::KernelTypeToString(expected_kernel_key);
if (!scope.FindVar(var_name_trans)) {
const_cast<Scope&>(scope).Var(var_name_trans);
need_trans.push_back(var_name);
}
}

// TODO(qijun) get appropriate DataTransformFN from global map
framework::DataTransformFN trans_fun = nullptr;
if (!need_trans.empty()) {
// TODO(qijun) get appropriate DeviceContext from DeviceContext pool
platform::DeviceContext* trans_dev_ctx = nullptr;
std::vector<platform::DeviceContext*> trans_dev_ctx_vec{trans_dev_ctx};

// Wait for transform starting
dev_ctx->Wait();
// Wait for transform starting
dev_ctx->Wait();

for (auto var_name : input_vars) {
trans_fun(trans_dev_ctx_vec, *(scope.FindVar(var_name)),
op_scope.FindVar(var_name));
}
// Wait for data transform finishing
for (auto ctx : trans_dev_ctx_vec) {
ctx->Wait();
for (auto var_name : need_trans) {
(*trans_fun)(trans_dev_ctx_vec, *(scope.FindVar(var_name)),
scope.FindVar(var_name + framework::KernelTypeToString(
expected_kernel_key)));
}
// Wait for data transform finishing
for (auto ctx : trans_dev_ctx_vec) {
ctx->Wait();
}
}
}

// Create a new ExecutionContext
ExecutionContext op_ctx(*this, op_scope, *dev_ctx);
kernel_iter->second->Compute(op_ctx);
}

kernel_iter->second->Compute(ctx);
}

OpKernelType OperatorWithKernel::GetActualKernelType(
Expand Down