Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
38da103
fix trt ci bug temporary. (#17565)
NHZlX May 23, 2019
58f7695
Async exe support communicator (#17386)
jacquesqiao May 23, 2019
c1aae8b
Fix GetExpectedKernelType in Concat op (#17459)
jerrywgz May 23, 2019
92e7d5d
fix distribute doc test=develop (#17318)
jacquesqiao May 23, 2019
cf60e5a
fix API python example (#17226)
heavengate May 23, 2019
d817263
add unittest of dygraph RL models. (#17550)
xyzhou-puck May 23, 2019
0357715
Enable elementwise min operator for ngraph (#17521)
mozga-intel May 23, 2019
c618963
Fix allocator bug (#16712)
sneaxiy May 23, 2019
3ee3611
test=develop, fix test_imperative_resnet failed on CI (#17583)
JiabinYang May 23, 2019
0600b37
[CPU] refine softmax op fwd on CPU (#17522)
tensor-tang May 23, 2019
6e11f97
Add exponential moving average (#17562)
May 23, 2019
ca03f49
fix distributed launch.py (#17571)
May 23, 2019
8818c94
test=develop (#17207)
xyzhou-puck May 24, 2019
7ae461e
[CPU] refine cpu softmax bwd (#17534)
tensor-tang May 24, 2019
7f8bc49
polish_executor_and_add_ctx_cache (#17536)
guru4elephant May 24, 2019
6101fd5
update ngraph to v0.19 test=develop (#17582)
mozga-intel May 24, 2019
cf02cb5
Enable elementwise sub operator for ngraph (#17527)
mozga-intel May 24, 2019
f2694e1
[NGraph] Enable assign operator for a ngraph, test=develop (#17437)
mozga-intel May 24, 2019
cee9dcc
Delete LoDTensorset in API.spec (#17577)
tianshuo78520a May 24, 2019
0d4cbda
[NGraph] Enable elementwise mul operator (#17552)
mozga-intel May 24, 2019
2dc1c6f
Add profiler in tracer (#17076)
May 24, 2019
3db9c8c
refine shape and split test. test=develop (#17545)
heavengate May 24, 2019
bccb0ba
fix quantize_squash_pass segfault when no tensor linked to Bias (#17292)
May 24, 2019
5b2a3c4
Conv concat relu quantization (#17466)
May 24, 2019
2280f18
BuildStrategy api comment (#17348)
fc500110 May 24, 2019
b5f4d5e
Add broadcast operators (#17503)
May 24, 2019
2a7b321
Fix the example code in some Python API. (#17343)
guoshengCS May 24, 2019
e8990e6
Fix trust ratio in lamb (#17614)
May 24, 2019
326bf82
add Run Prepared Ctx (#17616)
guru4elephant May 24, 2019
e53119f
Fix decayed adagrad example (#17390)
phlrain May 24, 2019
cbaf9e5
Fix api example [ lstm, sequence_enumerate, sequence_expand,sequence_…
phlrain May 24, 2019
e9216d0
Enable logical operators for the nGraph Bridge. (#17543)
kbinias May 24, 2019
887a39f
Fix dygraph unique name bug (#17592)
sneaxiy May 24, 2019
6724a65
add __str__ method for tensor and lodtensor to support print test=dev…
wopeizl May 24, 2019
21138eb
[DOC][PYTHON] Fix api comment of paddle.fluid.clip.GradientClipByValu…
ysh329 May 24, 2019
0c39b97
[MKL-DNN] Add Fully Connected Op for inference only(#15226)
Sand3r- May 24, 2019
61221eb
TRT: Support set dynamic range in int8 mode. (#17524)
NHZlX May 25, 2019
2b83d75
Enable elementwise pow operator for ngraph (#17526)
mozga-intel May 25, 2019
febc07f
Add Dockerfile for cuda9 and cuda10 (#17600)
zhhsplendid May 25, 2019
1670db5
Gather Op Index Support int64_t datatype (#17610)
hutuxian May 25, 2019
9322216
Add data distributed_sampler (#17573)
May 25, 2019
9f85afb
test=develop (#17643)
bdzhuxiaoning May 25, 2019
bbd6e43
fix conflicts,test=develop (#17186)
May 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment
# When you modify it, please be aware of cudnn-runtime version
# and libcudnn.so.x in paddle/scripts/docker/build.sh
FROM nvidia/cuda:8.0-cudnn7-devel-ubuntu16.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/ngraph.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject)

SET(NGRAPH_PROJECT "extern_ngraph")
SET(NGRAPH_GIT_TAG "127e0dedfaac8c6f2b148cc03bf5f67ac5fbe6fe")
SET(NGRAPH_GIT_TAG "096ad6ef0c04d57db1522940dbdf9a0652768065")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
Expand Down
2 changes: 1 addition & 1 deletion cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ function(cc_test TARGET_NAME)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_limit_of_tmp_allocation=4294967296) # 4G
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true ${MKL_DEBUG_FLAG})
# No unit test should exceed 10 minutes.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600)
endif()
Expand Down
93 changes: 48 additions & 45 deletions paddle/fluid/API.spec

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)

cc_test(tuple_test SRCS tuple_test.cc )

cc_test(inlined_vector_test SRCS inlined_vector_test.cc)

if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32)
Expand Down
66 changes: 30 additions & 36 deletions paddle/fluid/framework/details/async_ssa_graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,45 +51,39 @@ void ProcessGraph(std::vector<ir::Graph *> graphs, Scope *scope) {
VLOG(3) << "ProcessGraph";
RpcCtxMap send_varname_to_ctx;
RpcCtxMap recv_varname_to_ctx;
for (auto i = 0; i < graphs.size(); ++i) {
std::vector<ir::Node *> nodes_to_delete;
for (auto &node : graphs[i]->Nodes()) {
VLOG(3) << "node name " << node->Name();
if (node && node->IsOp()) {
if (node->Name() == "send") {
auto send_var_name = node->Op()->Input("X")[0];
auto send_varnames = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("send_varnames"));
auto epmap = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("epmap"));
auto height_section = boost::get<std::vector<int64_t>>(
node->Op()->GetNullableAttr("sections"));
auto trainer_id =
boost::get<int>(node->Op()->GetNullableAttr("trainer_id"));
send_varname_to_ctx[send_var_name] =
operators::distributed::RpcContext(send_var_name, send_varnames,
epmap, height_section,
trainer_id);
VLOG(3) << "find and init an send op: "
<< send_varname_to_ctx[send_var_name];
} else if (node->Name() == "recv") {
auto recv_var_name = node->Op()->Output("Out")[0];
auto recv_varnames = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("recv_varnames"));
auto epmap = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("epmap"));
auto trainer_id =
boost::get<int>(node->Op()->GetNullableAttr("trainer_id"));
recv_varname_to_ctx[recv_var_name] =
operators::distributed::RpcContext(recv_var_name, recv_varnames,
epmap, {}, trainer_id);
nodes_to_delete.push_back(node);
VLOG(3) << "find and remove an recv op: "
<< recv_varname_to_ctx[recv_var_name];
}
for (auto &node : graphs[0]->Nodes()) {
VLOG(3) << "node name " << node->Name();
if (node && node->IsOp()) {
if (node->Name() == "send") {
auto send_var_name = node->Op()->Input("X")[0];
auto send_varnames = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("send_varnames"));
auto epmap = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("epmap"));
auto height_section = boost::get<std::vector<int64_t>>(
node->Op()->GetNullableAttr("sections"));
auto trainer_id =
boost::get<int>(node->Op()->GetNullableAttr("trainer_id"));
send_varname_to_ctx[send_var_name] = operators::distributed::RpcContext(
send_var_name, send_varnames, epmap, height_section, trainer_id);
VLOG(3) << "find and init an send op: "
<< send_varname_to_ctx[send_var_name];
} else if (node->Name() == "recv") {
auto recv_var_name = node->Op()->Output("Out")[0];
auto recv_varnames = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("recv_varnames"));
auto epmap = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("epmap"));
auto trainer_id =
boost::get<int>(node->Op()->GetNullableAttr("trainer_id"));
recv_varname_to_ctx[recv_var_name] = operators::distributed::RpcContext(
recv_var_name, recv_varnames, epmap, {}, trainer_id);
VLOG(3) << "find and remove an recv op: "
<< recv_varname_to_ctx[recv_var_name];
}
}
}

// init communicator here
if (send_varname_to_ctx.size() > 0) {
VLOG(3) << "this is distribute mode, will use communicator";
Expand Down
21 changes: 20 additions & 1 deletion paddle/fluid/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,25 @@ static bool has_fetch_operators(
return fetch_count > 0;
}

std::unique_ptr<ExecutorPrepareContext> Executor::PrepareCtxCache(
const ProgramDesc& program, int block_id,
const std::vector<std::string>& skip_ref_cnt_vars, bool force_disable_gc) {
std::unique_ptr<ExecutorPrepareContext> ctx;
ctx.reset(new ExecutorPrepareContext(program, block_id));
auto& block = program.Block(block_id);
for (auto& op_desc : block.AllOps()) {
ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc));
}
#ifdef PADDLE_WITH_NGRAPH
if (FLAGS_use_ngraph) {
paddle::operators::NgraphEngine::FuseNgraphOps(
ctx->prog_.Block(ctx->block_id_), &ctx->ops_);
}
#endif
ctx->PrepareUnusedVars(skip_ref_cnt_vars, force_disable_gc);
return ctx;
}

void Executor::Run(const ProgramDesc& program, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets,
Expand Down Expand Up @@ -368,6 +387,7 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars,
bool keep_kids) {
platform::RecordBlock b(kProgramId);
PADDLE_ENFORCE_NOT_NULL(scope);
Scope* local_scope = scope;
if (create_vars) {
Expand Down Expand Up @@ -407,7 +427,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,

for (auto& op : ctx->ops_) {
op->Run(*local_scope, place_);

if (gc) {
DeleteUnusedTensors(*local_scope, op.get(), ctx->unused_vars_, gc.get());
}
Expand Down
24 changes: 15 additions & 9 deletions paddle/fluid/framework/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,21 @@ class Executor {
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");

// This API is very slow.
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets,
bool create_local_scope = true,
bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");

std::unique_ptr<ExecutorPrepareContext> PrepareCtxCache(
const ProgramDesc& program, int block_id,
const std::vector<std::string>& skip_ref_cnt_vars =
std::vector<std::string>(),
bool force_disable_gc = false);

static std::unique_ptr<ExecutorPrepareContext> Prepare(
const ProgramDesc& program, int block_id,
const std::vector<std::string>& skip_ref_cnt_vars =
Expand All @@ -101,15 +116,6 @@ class Executor {
bool create_local_scope = true,
bool create_vars = true, bool keep_kids = false);

// This API is very slow.
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
std::map<std::string, const LoDTensor*>* feed_targets,
std::map<std::string, LoDTensor*>* fetch_targets,
bool create_local_scope = true,
bool create_vars = true,
const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch");

void EnableMKLDNN(const ProgramDesc& program);

void RunFromDataset(const ProgramDesc& main_program, Scope* scope,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/hogwild_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void HogwildWorker::Initialize(const TrainerDesc& desc) {
fetch_config_ = desc.fetch_config();
param_ = desc.hogwild_param();
skip_ops_.resize(param_.skip_ops_size());
for (size_t i = 0; i < param_.skip_ops_size(); ++i) {
for (int i = 0; i < param_.skip_ops_size(); ++i) {
skip_ops_[i] = param_.skip_ops(i);
}
use_cvm_ = desc.use_cvm();
Expand Down
69 changes: 69 additions & 0 deletions paddle/fluid/framework/inlined_vector.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <cstdint>
#include <vector>
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace framework {

template <typename T, size_t N>
class InlinedVector {
static_assert(N > 0, "N must be larger than 0");

public:
inline InlinedVector() { len_ = 0; }

inline size_t size() const { return len_; }

inline T& operator[](size_t i) { return i < N ? head_[i] : tail_[i - N]; }

inline const T& operator[](size_t i) const {
return i < N ? head_[i] : tail_[i - N];
}

inline void emplace_back(const T& item) {
if (LIKELY(len_ < N)) {
head_[len_++] = item;
} else {
tail_.emplace_back(item);
++len_;
}
}

inline void pop_back() {
if (UNLIKELY(len_ > N)) {
tail_.pop_back();
}
--len_;
}

inline T& back() {
if (LIKELY(len_ <= N)) {
return head_[len_ - 1];
} else {
return tail_.back();
}
}

private:
T head_[N];
size_t len_;
std::vector<T> tail_;
};

} // namespace framework
} // namespace paddle
82 changes: 82 additions & 0 deletions paddle/fluid/framework/inlined_vector_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/framework/inlined_vector.h"
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <vector>
#include "gtest/gtest.h"

namespace paddle {
namespace framework {

template <typename T, size_t N>
static std::vector<T> ToStdVector(const framework::InlinedVector<T, N> &vec) {
std::vector<T> std_vec;
std_vec.reserve(vec.size());
for (size_t i = 0; i < vec.size(); ++i) {
std_vec.emplace_back(vec[i]);
}
return std_vec;
}

template <size_t N>
void InlinedVectorCheck(size_t n) {
std::srand(std::time(nullptr));

std::vector<int> std_vec;
framework::InlinedVector<int, N> vec;

for (size_t i = 0; i < n; ++i) {
int value = rand(); // NOLINT

std_vec.emplace_back(value);
vec.emplace_back(value);

CHECK_EQ(std_vec.size(), vec.size());
CHECK_EQ(std_vec.back(), vec.back());

CHECK_EQ(vec.back(), value);
}

bool is_equal = (std_vec == ToStdVector(vec));

CHECK_EQ(is_equal, true);

for (size_t i = 0; i < n; ++i) {
CHECK_EQ(std_vec.size(), vec.size());
CHECK_EQ(std_vec.back(), vec.back());
std_vec.pop_back();
vec.pop_back();
CHECK_EQ(std_vec.size(), vec.size());
}

CHECK_EQ(std_vec.size(), static_cast<size_t>(0));
CHECK_EQ(vec.size(), static_cast<size_t>(0));
}

TEST(inlined_vector, inlined_vector) {
for (size_t i = 0; i < 20; ++i) {
InlinedVectorCheck<1>(i);
InlinedVectorCheck<10>(i);
InlinedVectorCheck<15>(i);
InlinedVectorCheck<20>(i);
InlinedVectorCheck<21>(i);
InlinedVectorCheck<25>(i);
}
}

} // namespace framework
} // namespace paddle
4 changes: 4 additions & 0 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pass_library(runtime_context_cache_pass base)
pass_library(quant_conv2d_dequant_fuse_pass inference)
pass_library(fillconstant_elementwisemul_fuse inference)
pass_library(shuffle_channel_detect_pass inference)
pass_library(delete_quant_dequant_op_pass inference)

if(ANAKIN_FOUND)
pass_library(simplify_anakin_priorbox_detection_out_pass inference)
Expand All @@ -86,7 +87,9 @@ if(WITH_MKLDNN)
pass_library(conv_bias_mkldnn_fuse_pass inference mkldnn)
pass_library(conv_relu_mkldnn_fuse_pass inference mkldnn)
pass_library(conv_brelu_mkldnn_fuse_pass inference mkldnn)
pass_library(conv_concat_relu_mkldnn_fuse_pass inference mkldnn)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference mkldnn)
pass_library(fc_mkldnn_pass inference mkldnn)
pass_library(cpu_quantize_placement_pass base mkldnn)
pass_library(cpu_quantize_pass inference mkldnn)
pass_library(cpu_quantize_squash_pass inference mkldnn)
Expand Down Expand Up @@ -116,6 +119,7 @@ if (WITH_MKLDNN)
cc_test(test_conv_bias_mkldnn_fuse_pass SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor)
cc_test(test_conv_relu_mkldnn_fuse_pass SRCS mkldnn/conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass)
cc_test(test_conv_brelu_mkldnn_fuse_pass SRCS mkldnn/conv_brelu_mkldnn_fuse_pass_tester.cc DEPS conv_brelu_mkldnn_fuse_pass)
cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass)
cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass)
cc_test(test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass)
Expand Down
Loading