Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d0ce6a9
fix anakin converter registry (#15993)
Superjomn Mar 1, 2019
be523ba
Add anakin conv2d/relu/sigmoid/tanh converter (#15997)
fc500110 Mar 1, 2019
084310f
paddle-anakin: concat, split, pool2d converter#16003
NHZlX Mar 20, 2019
b21770a
cherry-pick from feature/anakin-engine: Add subgraph fuse support and…
NHZlX Mar 20, 2019
0945b97
cherry-pick feature/anakin-engine: add anakin softmax/transpose/batch…
fc500110 Mar 6, 2019
a32d420
cherry-pick from feature/anakin-engine: batch norm (#16110)
fc500110 Mar 7, 2019
a1d200a
cherry-pick from feature/anakin-engine: Anakin support facebox #16111
NHZlX Mar 20, 2019
69d37f8
cherry-pick from feature/anakin-engine: refine anakin subgraph. #16157
NHZlX Mar 20, 2019
c79f06d
cherry-pick from feature/anakin-engine: add batch interface for pd-an…
NHZlX Mar 20, 2019
a25331b
cherry-pick from feature/anakin-engine: deal the changing shape when …
NHZlX Mar 20, 2019
c407dfa
cherry-pick from feature/anakin-engine: refine paddle-anakin to new i…
NHZlX Mar 20, 2019
07dcf28
git cherry-pick from feature/anakin-engine: update anakin subgraph #1…
NHZlX Mar 20, 2019
4f4daa4
cherry-pick from feature/anakin-engine: add data type for zero copy #…
NHZlX Mar 20, 2019
f3a2e4b
1. Add ANAKIN_ROOT compile option
NHZlX Mar 22, 2019
3df7b98
Merge branch 'develop' of https://github.com/paddlepaddle/paddle into…
NHZlX Mar 22, 2019
a1d11bb
fix ci bug: cudnn handler in multi card
NHZlX Mar 25, 2019
45b3766
fix comments
NHZlX Mar 26, 2019
953bdde
Merge branch 'develop' of https://github.com/paddlepaddle/paddle into…
NHZlX Mar 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
Expand Down Expand Up @@ -190,6 +191,7 @@ include(configure) # add paddle env configuration
if(WITH_GPU)
include(cuda)
include(tensorrt)
include(anakin_subgraph)
endif()
if(WITH_MKL OR WITH_MKLML)
include(external/anakin)
Expand Down
32 changes: 32 additions & 0 deletions cmake/anakin_subgraph.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
if(NOT WITH_GPU)
return()
endif()

set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)

find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS ${ANAKIN_ROOT}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC "Path to ANAKIN library.")

if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()

if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/include/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()
1 change: 1 addition & 0 deletions cmake/tensorrt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ if(TENSORRT_FOUND)
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()
8 changes: 7 additions & 1 deletion paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,22 @@ pass_library(transpose_flatten_concat_fuse_pass inference)
pass_library(identity_scale_op_clean_pass base)
pass_library(sync_batch_norm_pass base)
pass_library(runtime_context_cache_pass base)
pass_library(simplify_anakin_detection_pattern_pass inference)
pass_library(anakin_fillconstant_elementwisemul_fuse inference)

# There may be many transpose-flatten structures in a model, and the output of
# these structures will be used as inputs to the concat Op. This pattern will
# be detected by our pass. The index here represents the number of structures in the
# pattern. We use index 3 ~ 6, because these quantities of structures are
# common in the models.
foreach (index RANGE 3 6)
foreach (index RANGE 2 6)
file(APPEND ${pass_file} "USE_PASS(transpose_flatten${index}_concat_fuse_pass);\n")
endforeach()

foreach (index RANGE 2 6)
file(APPEND ${pass_file} "USE_PASS(simplify_anakin_detection_pattern_pass${index});\n")
endforeach()

if(WITH_MKLDNN)
pass_library(mkldnn_placement_pass base mkldnn)
pass_library(depthwise_conv_mkldnn_pass base mkldnn)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>

#include "paddle/fluid/framework/ir/anakin_fillconstant_elementwisemul_fuse.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"

namespace paddle {
namespace framework {
namespace ir {

#define GET_IR_NODE(node__) GET_IR_NODE_FROM_SUBGRAPH(node__, node__, pattern);
#define GET_NODES \
GET_IR_NODE(fill_constant); \
GET_IR_NODE(fill_constant_out); \
GET_IR_NODE(elementwise_mul); \
GET_IR_NODE(elementwise_mul_out);

std::unique_ptr<ir::Graph> AnakinFillconstantElementwisemulFuse::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
const std::string pattern_name = "anakin_fillconstant_elementwisemul_fuse";
FusePassBase::Init(pattern_name, graph.get());

GraphPatternDetector gpd;
auto* x = gpd.mutable_pattern()
->NewNode("x")
->assert_is_op_input("elementwise_mul", "X")
->AsInput();

patterns::AnakinFillConstantElementWiseMulFuse pattern(gpd.mutable_pattern(),
pattern_name);
pattern(x);

auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
GET_NODES;

PADDLE_ENFORCE(subgraph.count(x));
auto* elementwise_in = subgraph.at(x);
float constant_value =
boost::get<float>(fill_constant->Op()->GetAttr("value"));

framework::OpDesc new_op_desc;
new_op_desc.SetType("scale");
new_op_desc.SetInput("X", {elementwise_in->Name()});
new_op_desc.SetAttr("scale", constant_value);
new_op_desc.SetAttr("bias", static_cast<float>(0.0));
new_op_desc.SetAttr("bias_after_scale", true);
new_op_desc.SetOutput("Out", {elementwise_mul_out->Name()});
new_op_desc.Flush();

// Create a new node for the fused op.
auto* scale_op = graph->CreateOpNode(&new_op_desc);

IR_NODE_LINK_TO(elementwise_in, scale_op); // Input
IR_NODE_LINK_TO(scale_op, elementwise_mul_out); // Output

// Delete the unneeded nodes.
GraphSafeRemoveNodes(graph.get(),
{fill_constant, fill_constant_out, elementwise_mul});
};

gpd(graph.get(), handler);
return graph;
}

} // namespace ir
} // namespace framework
} // namespace paddle

REGISTER_PASS(anakin_fillconstant_elementwisemul_fuse,
paddle::framework::ir::AnakinFillconstantElementwisemulFuse);
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <memory>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"

namespace paddle {
namespace framework {
namespace ir {

class AnakinFillconstantElementwisemulFuse : public FusePassBase {
public:
virtual ~AnakinFillconstantElementwisemulFuse() {}

protected:
std::unique_ptr<ir::Graph> ApplyImpl(
std::unique_ptr<ir::Graph> graph) const override;
};

} // namespace ir
} // namespace framework
} // namespace paddle
165 changes: 165 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,171 @@ PDNode *patterns::TransposeFlattenConcat::operator()(
return concat_out;
}

PDNode *patterns::AnakinDetectionPattern::operator()(
std::vector<PDNode *> conv_in, int times) {
// The times represents the repeat times of the
// {prior_box, prior_box_loc_out, flatten, prior_box_var_out, reshape}
const int kNumFields = 7;
const int kPriorBoxLocOffset = 1;
const int kReshape1Offset = 2;
const int kReshape1OutOffset = 3;
const int kPriorBoxVarOffset = 4;
const int kReshape2Offset = 5;
const int kReshape2OutOffset = 6;

const int kBoxCoderThirdInputOffset = times;
const int kMultiClassSecondInputNmsOffset = times + 1;

std::vector<PDNode *> nodes;

for (int i = 0; i < times; i++) {
nodes.push_back(
pattern->NewNode(GetNodeName("prior_box" + std::to_string(i)))
->assert_is_op("density_prior_box"));
nodes.push_back(pattern->NewNode(GetNodeName("box_out" + std::to_string(i)))
->assert_is_op_output("density_prior_box", "Boxes")
->assert_is_op_input("reshape2", "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape1" + std::to_string(i)))
->assert_is_op("reshape2"));

nodes.push_back(
pattern->NewNode(GetNodeName("reshape1_out" + std::to_string(i)))
->assert_is_op_output("reshape2")
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());

nodes.push_back(
pattern->NewNode(GetNodeName("box_var_out" + std::to_string(i)))
->assert_is_op_output("density_prior_box", "Variances")
->assert_is_op_input("reshape2", "X")
->AsIntermediate());
nodes.push_back(
pattern->NewNode(GetNodeName("reshape2" + std::to_string(i)))
->assert_is_op("reshape2"));

nodes.push_back(
pattern->NewNode(GetNodeName("reshape2_out" + std::to_string(i)))
->assert_is_op_output("reshape2")
->assert_is_op_nth_input("concat", "X", i)
->AsIntermediate());
}

auto concat_op1 = pattern->NewNode(GetNodeName("concat1"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out1 = pattern->NewNode(GetNodeName("concat1_out"))
->assert_is_op_output("concat")
->AsIntermediate();

auto concat_op2 = pattern->NewNode(GetNodeName("concat2"))
->assert_is_op("concat")
->assert_op_has_n_inputs("concat", times);
auto concat_out2 = pattern->NewNode(GetNodeName("concat2_out"))
->assert_is_op_output("concat")
->AsIntermediate();

auto box_coder_op = pattern->NewNode(GetNodeName("box_coder"))
->assert_is_op("box_coder")
->assert_op_has_n_inputs("box_coder", 3);

auto box_coder_out = pattern->NewNode(GetNodeName("box_coder_out"))
->assert_is_op_output("box_coder")
->AsIntermediate();

auto transpose_before_nms =
pattern->NewNode(GetNodeName("transpose_before_nms"))
->assert_is_op("transpose2");

auto transpose_before_nms_out =
pattern->NewNode(GetNodeName("transpose_before_nms_out"))
->assert_is_op_output("transpose2")
->assert_is_op_input("multiclass_nms", "Scores")
->AsIntermediate();

auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
->assert_is_op("multiclass_nms")
->assert_op_has_n_inputs("multiclass_nms", 2);

auto multiclass_nms_out = pattern->NewNode(GetNodeName("multiclass_nms_out"))
->assert_is_op_output("multiclass_nms")
->AsOutput();

std::vector<PDNode *> reshape1_outs;
std::vector<PDNode *> reshape2_outs;

for (int i = 0; i < times; i++) {
conv_in[i]->AsInput();
// prior_box
nodes[i * kNumFields]->LinksFrom({conv_in[i]});
// prior_box box out
nodes[i * kNumFields + kPriorBoxLocOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape1Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxLocOffset]});
// reshape_out
nodes[i * kNumFields + kReshape1OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape1Offset]});

nodes[i * kNumFields + kPriorBoxVarOffset]->LinksFrom(
{nodes[i * kNumFields]});
// reshape
nodes[i * kNumFields + kReshape2Offset]->LinksFrom(
{nodes[i * kNumFields + kPriorBoxVarOffset]});
// reshape_out
nodes[i * kNumFields + kReshape2OutOffset]->LinksFrom(
{nodes[i * kNumFields + kReshape2Offset]});

reshape1_outs.push_back(nodes[i * kNumFields + kReshape1OutOffset]);
reshape2_outs.push_back(nodes[i * kNumFields + kReshape2OutOffset]);
}

concat_op1->LinksFrom(reshape1_outs);
concat_op2->LinksFrom(reshape2_outs);
concat_out1->LinksFrom({concat_op1});
concat_out2->LinksFrom({concat_op2});

conv_in[kBoxCoderThirdInputOffset]->AsInput();
conv_in[kMultiClassSecondInputNmsOffset]->AsInput();

box_coder_op->LinksFrom(
{concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
box_coder_out->LinksFrom({box_coder_op});

transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]});
transpose_before_nms_out->LinksFrom({transpose_before_nms});

multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out})
.LinksTo({multiclass_nms_out});

return multiclass_nms_out;
}

PDNode *patterns::AnakinFillConstantElementWiseMulFuse::operator()(
PDNode *elementwise_op_input) {
auto fill_constant =
pattern->NewNode(fill_constant_repr())->assert_is_op("fill_constant");

auto fill_constant_out = pattern->NewNode(fill_constant_out_repr())
->assert_is_op_output("fill_constant")
->assert_is_op_input("elementwise_mul", "Y")
->AsIntermediate();

auto elementwise_mul_op =
pattern->NewNode(elementwise_mul_repr())->assert_is_op("elementwise_mul");

auto elementwise_mul_out = pattern->NewNode(elementwise_mul_out_repr())
->assert_is_op_output("elementwise_mul")
->AsOutput();

fill_constant_out->LinksFrom({fill_constant});
elementwise_mul_op->LinksFrom({elementwise_op_input, fill_constant_out});
elementwise_mul_out->LinksFrom({elementwise_mul_op});
return elementwise_mul_out;
}

} // namespace ir
} // namespace framework
} // namespace paddle
Loading