Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
3acb363
1. move the API check into CPU process (#17110)
Apr 29, 2019
554d3a7
test=develop fix bug: fix selected_indices in nms (#17140)
lijiancheng0614 Apr 29, 2019
deb510d
cvm op feature (#17081)
seiriosPlus Apr 29, 2019
626922d
fix run_time_error in uniform_random. test=develop (#17152)
fc-liu Apr 29, 2019
bf4b21f
fix assertion failure issue when test_analyzer_bert uses ngraph (#17148)
LeoZhao-Intel Apr 29, 2019
bc48453
polish the label_smooth (#17138)
tink2123 Apr 30, 2019
e4a52e0
resolve #17159 (#17172)
guomingz Apr 30, 2019
aa5307c
fix python3 run_time_error in ops. test=develop (#17170)
fc-liu Apr 30, 2019
08773b6
fix reader default stream,test=develop (#17106)
sneaxiy Apr 30, 2019
4e1bc6e
Rewrite inplace pass and fix gc bug (#17126)
sneaxiy Apr 30, 2019
79ed1c7
fix bn fuse vardesc and add model saver (#17143)
tensor-tang Apr 30, 2019
e4a5332
Fix a typo in gpu_info.cc (#17175)
zhhsplendid Apr 30, 2019
5dfe2ab
Fix mem leak when converting Tensor to numpy array (#17182)
sneaxiy Apr 30, 2019
f938cce
remove async executor python api to fix document (#17174)
guru4elephant May 1, 2019
3c6ab79
Remove unnecessary set_devices (#17158)
May 5, 2019
8092c40
Modify test timeout (#17181)
tianshuo78520a May 5, 2019
83c4f77
use two GPUs to run the exclusive test test=develop (#17187)
wopeizl May 5, 2019
a72907b
Enhance concat op to support empty input. (#17015)
jerrywgz May 5, 2019
950aec5
It doesn't need sync when fetch_list nit not empty (#17201)
May 5, 2019
f2db475
update ParallelExecutor (#17204)
May 6, 2019
ee2028a
Add use_cuda to inplace pass (#17205)
sneaxiy May 6, 2019
c5eeecc
Fix tensor_py.h (#17195)
sneaxiy May 6, 2019
06325c4
disable the test_distillation_strategy temporarily (#17227)
May 6, 2019
9ec4615
fix profiler and name_scope API examples (#17212)
luotao1 May 6, 2019
cc95a75
fix distribute fpn proposals, test=develop (#16152)
jerrywgz May 6, 2019
5817077
Fix unexecutable API examples (#17218)
shippingwang May 6, 2019
6b0f27e
Fix some APIs' example (#17214)
May 7, 2019
54636a1
call SetNumThreads everytime to avoid missing omp thread setting (#17…
LeoZhao-Intel May 7, 2019
ef66bae
Refine api doc (#17230)
jerrywgz May 7, 2019
16922e0
fix api_example of tree_conv (#17239)
luotao1 May 7, 2019
a72dbe9
Cherry-pick benchmark related changes from release/1.4 (#17156)
Shixiaowei02 May 7, 2019
32b62c2
optimize sum op (#16820)
zhaoyuchen2018 May 7, 2019
a914d9b
Quant output scale (#17215)
wzzju May 7, 2019
a40121e
fix the initialization process error. test=develop (#17213)
wzzju May 7, 2019
c2e20e2
fix build warning like 'comparison between signed and unsigned (#17240)
a6802739 May 7, 2019
a71d8fd
Softmax_cross_entropy op add axis (#16806)
heavengate May 7, 2019
ff1661f
remove unused FLAGS_warpctc_dir (#17162)
luotao1 May 7, 2019
6fafd37
fix retry_allocator (#17245)
sneaxiy May 7, 2019
71f0c6d
fix api doc of hash, relu, concat, argmin, argmax, argsoft and all ac…
tensor-tang May 7, 2019
e782b54
update sofmax with axis arg test=develop (#17190)
baojun-nervana May 7, 2019
8b62f53
fix nn.py lack of `import warnings` (#17249)
luotao1 May 7, 2019
4f85940
Enhance inplace/mem-opt pass and enhance softmax_with_cross_entropy o…
sneaxiy May 7, 2019
f2fa3f7
fix api doc,test=develop (#17241)
sneaxiy May 7, 2019
648320b
Fix some data and reader related API code (#17202)
zhhsplendid May 8, 2019
04bd413
Code Clean: Move all pass to paddle::framework::ir (#17228)
May 8, 2019
91784f8
Fix code in document. (#17237)
gongweibao May 8, 2019
dd86b40
document_preview (#17166)
tianshuo78520a May 8, 2019
8f53469
Polish Executor and Compiler doc (#17262)
May 8, 2019
65541d8
add scale pass for calculating the output scales.test=develop (#17259)
wzzju May 8, 2019
984aa90
improved unit test output (#17266)
May 8, 2019
7bd1d03
Adding lrn op for ngraph engine (#17189)
baojun-nervana May 8, 2019
7d7e299
Fix bp of roi perspective transform op. (#17216)
wanghaoshuang May 8, 2019
dab71e8
Fix api example (#17231)
ceci3 May 8, 2019
6d1d7c8
Fix the KL algorithm bug when calculated the size of tensor. (#17198)
guomingz May 8, 2019
c3195de
Fix concat shape check (#17247)
phlrain May 8, 2019
db5e74a
update assert (#17282)
May 8, 2019
6b84688
Optimize the cuda implementation of sum_op (#17283)
Xreki May 8, 2019
e388a1f
Repair api example (#17221)
junjun315 May 8, 2019
d8af44a
test=develop, fix error with training and test on diff device (#17276)
JiabinYang May 8, 2019
792443e
Refine elementwise kernel. (#16952)
zhaoyuchen2018 May 8, 2019
9ed4aaa
modified formula for Lrn (#17281)
tink2123 May 8, 2019
2c44627
Fix API example code of save_inference_model (#17274)
zhhsplendid May 8, 2019
516317c
use sync copy (#17291)
May 8, 2019
5d6a1fc
fix infer_from_dataset and train_from_dataset (#17243)
guru4elephant May 9, 2019
a88a1fa
Format file path (#17280)
junjun315 May 9, 2019
565d309
Reformat fleet API (#17135)
seiriosPlus May 9, 2019
4292bd8
Mod floordiv (#17251)
zhoukunsheng May 9, 2019
50ad904
add import, test=develop (#17229)
tink2123 May 9, 2019
7a3bb06
fix: (#17279)
NHZlX May 9, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmake/anakin_subgraph.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ endif()

if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT})
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/include/saber)
include_directories(${ANAKIN_ROOT}/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()
1 change: 1 addition & 0 deletions cmake/external/warpctc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ else(WIN32)
ENDIF(WIN32)

MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}")
get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY)
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers.
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include warpctc headers.

Expand Down
216 changes: 104 additions & 112 deletions paddle/fluid/API.spec

Large diffs are not rendered by default.

35 changes: 28 additions & 7 deletions paddle/fluid/framework/data_feed.cc
Original file line number Diff line number Diff line change
Expand Up @@ -455,21 +455,29 @@ void MultiSlotDataFeed::Init(
all_slots_.resize(all_slot_num);
all_slots_type_.resize(all_slot_num);
use_slots_index_.resize(all_slot_num);
total_dims_without_inductive_.resize(all_slot_num);
inductive_shape_index_.resize(all_slot_num);
use_slots_.clear();
use_slots_is_dense_.clear();
for (size_t i = 0; i < all_slot_num; ++i) {
const auto& slot = multi_slot_desc.slots(i);
all_slots_[i] = slot.name();
all_slots_type_[i] = slot.type();
use_slots_index_[i] = slot.is_used() ? use_slots_.size() : -1;
total_dims_without_inductive_[i] = 1;
inductive_shape_index_[i] = -1;
if (slot.is_used()) {
use_slots_.push_back(all_slots_[i]);
use_slots_is_dense_.push_back(slot.is_dense());
std::vector<int> local_shape;
if (slot.is_dense()) {
// for batch size holder if is_dense
if (slot.shape(0) > 0) {
local_shape.push_back(0);
for (size_t i = 0; i < slot.shape_size(); ++i) {
if (slot.shape(i) > 0) {
total_dims_without_inductive_[i] *= slot.shape(i);
}
if (slot.shape(i) == -1) {
inductive_shape_index_[i] = i;
}
}
}
for (size_t i = 0; i < slot.shape_size(); ++i) {
Expand Down Expand Up @@ -762,7 +770,10 @@ void MultiSlotDataFeed::PutToFeedVec(
LoD data_lod{offset};
feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) {
use_slots_shape_[i][0] = batch_size_;
if (inductive_shape_index_[i] != -1) {
use_slots_shape_[i][inductive_shape_index_[i]] =
total_instance / total_dims_without_inductive_[i];
}
feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
}
}
Expand All @@ -785,6 +796,8 @@ void MultiSlotInMemoryDataFeed::Init(
all_slots_.resize(all_slot_num);
all_slots_type_.resize(all_slot_num);
use_slots_index_.resize(all_slot_num);
total_dims_without_inductive_.resize(all_slot_num);
inductive_shape_index_.resize(all_slot_num);
use_slots_.clear();
use_slots_is_dense_.clear();
for (size_t i = 0; i < all_slot_num; ++i) {
Expand All @@ -797,8 +810,13 @@ void MultiSlotInMemoryDataFeed::Init(
use_slots_is_dense_.push_back(slot.is_dense());
std::vector<int> local_shape;
if (slot.is_dense()) {
if (slot.shape(0) > 0) {
local_shape.push_back(0);
for (size_t i = 0; i < slot.shape_size(); ++i) {
if (slot.shape(i) > 0) {
total_dims_without_inductive_[i] *= slot.shape(i);
}
if (slot.shape(i) == -1) {
inductive_shape_index_[i] = i;
}
}
}
for (size_t i = 0; i < slot.shape_size(); ++i) {
Expand Down Expand Up @@ -960,7 +978,10 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
LoD data_lod{offset};
feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) {
use_slots_shape_[i][0] = batch_size_;
if (inductive_shape_index_[i] != -1) {
use_slots_shape_[i][inductive_shape_index_[i]] =
total_instance / total_dims_without_inductive_[i];
}
feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
}
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/data_feed.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ class DataFeed {
std::vector<std::string> all_slots_;
std::vector<std::string> all_slots_type_;
std::vector<std::vector<int>> use_slots_shape_;
std::vector<int> inductive_shape_index_;
std::vector<int> total_dims_without_inductive_;
std::vector<int>
use_slots_index_; // -1: not used; >=0: the index of use_slots_

Expand Down
10 changes: 10 additions & 0 deletions paddle/fluid/framework/ddim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,16 @@ int64_t product(const DDim& ddim) {
return ddim.apply_visitor(ProductVisitor());
}

bool contain_unknown_dim(const DDim& ddim) {
for (int i = 0; i < ddim.size(); ++i) {
if (ddim[i] < 0) {
return true;
}
}

return false;
}

DDim slice_ddim(const DDim& dim, int begin, int end) {
PADDLE_ENFORCE(begin >= 0 && end <= dim.size(),
"[begin(%d), end(%d)) must be inside [0, %d) in ddim slice.",
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ddim.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ std::vector<int> vectorize2int(const DDim& ddim);

int64_t product(const DDim& ddim);

bool contain_unknown_dim(const DDim& ddim);

/**
* \brief Slice a ddim
*
Expand Down
37 changes: 2 additions & 35 deletions paddle/fluid/framework/details/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
cc_library(var_handle SRCS var_handle.cc DEPS place framework_proto node)
cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context lod_tensor)
cc_library(op_graph_view SRCS op_graph_view.cc DEPS op_handle_base)

cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framework_proto scope place operator op_registry)

cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper)
cc_library(multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper)
cc_library(multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper)

cc_library(alloc_continuous_space_for_grad_pass SRCS alloc_continuous_space_for_grad_pass.cc DEPS graph graph_helper)
cc_library(fuse_adam_op_pass SRCS fuse_adam_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper)
cc_library(fuse_sgd_op_pass SRCS fuse_sgd_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper)
cc_library(fuse_momentum_op_pass SRCS fuse_momentum_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper)

cc_library(record_skip_memory_opt_vars_pass SRCS record_skip_memory_opt_vars_pass.cc DEPS graph graph_helper)

cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows)

Expand All @@ -27,7 +17,7 @@ if(WITH_DISTRIBUTE)
endif()
endif()

set(all_reduce_deps all_reduce_op_handle)

if(WITH_GPU)
nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda variable_visitor)
Expand All @@ -37,7 +27,6 @@ if(WITH_GPU)
if(WITH_DGC)
nv_library(sparse_all_reduce_op_handle SRCS sparse_all_reduce_op_handle.cc DEPS op_handle_base scope
lod_tensor ddim memory dynload_cuda variable_visitor dgc all_reduce_op_handle)
set(all_reduce_deps sparse_all_reduce_op_handle)
endif()

if(WITH_DISTRIBUTE)
Expand Down Expand Up @@ -68,34 +57,12 @@ endif()

cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)

if(WITH_GPU)
cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info)
else()
cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info)
endif()

cc_library(memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass)
cc_library(inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info)
cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper)
cc_library(reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle)
cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper)
cc_library(while_op_eager_deletion_pass SRCS while_op_eager_deletion_pass.cc DEPS while_op_helper graph_helper pass computation_op_handle)
cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle eager_deletion_op_handle graph graph_helper pass while_op_eager_deletion_pass)
cc_library(reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper)

cc_library(sequential_execution_pass SRCS sequential_execution_pass.cc DEPS graph graph_helper pass)
cc_library(all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_helper pass)

cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
scale_loss_grad_op_handle rpc_op_handle fetch_barrier_op_handle ${all_reduce_deps} reduce_op_handle broadcast_op_handle fused_broadcast_op_handle)

cc_library(fuse_all_reduce_op_pass SRCS fuse_all_reduce_op_pass.cc DEPS graph graph_helper fused_all_reduce_op_handle)

set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass)
if (WITH_GPU)
list(APPEND SSA_GRAPH_EXECUTOR_DEPS reference_count_pass)
endif()
cc_test(memory_optimize_helper_test SRCS memory_optimize_helper_test.cc memory_optimize_helper.cc DEPS framework_proto graph graph_helper op_registry)
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS})

cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
Expand Down
Loading