Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
645 commits
Select commit Hold shift + click to select a range
98dfb49
Release GIL lock
velconia Mar 14, 2019
84c0054
Make timeline_py compatible (#16185)
Mar 14, 2019
b2898c0
Merge branch 'develop' into runtime_context
luotao1 Mar 14, 2019
0b49e43
Merge pull request #16144 from sneaxiy/rnn_mem_opt
sneaxiy Mar 14, 2019
7355d41
1. Add imperative gperf profiler
velconia Mar 14, 2019
f837394
Polish code
velconia Mar 14, 2019
b9252f3
Add cpu_quantize_squash_pass for C-API quantization (#16128)
Mar 14, 2019
1c6caf8
1. disable reuse SELECTED_ROWS type variable (#16150)
liupluswei Mar 14, 2019
a91964c
Revert "PaddingRNN model memory optimize"
sneaxiy Mar 14, 2019
e993eff
include unordered_map to cross_entropy_op.cc
sneaxiy Mar 14, 2019
55ba7f6
fix numeric error
sneaxiy Mar 14, 2019
79df026
Merge pull request #16208 from PaddlePaddle/revert-16144-rnn_mem_opt
sneaxiy Mar 14, 2019
ad5f0e6
merge develop
sneaxiy Mar 14, 2019
5a92e4c
revert revert 16144
sneaxiy Mar 14, 2019
3e03695
fix numeric error
sneaxiy Mar 14, 2019
0ca6465
Merge pull request #16210 from sneaxiy/rnn_mem_opt
sneaxiy Mar 15, 2019
a59b7d4
improve layers.fc api doc test=develop
Aurelius84 Mar 15, 2019
e5e7628
Skip compile infer shape in box_coder_op
qingqing01 Mar 15, 2019
f0d108f
fix const_cast
sneaxiy Mar 15, 2019
4ae23cc
Impl fp16 compute kernel for slice_op (#16206)
Mar 15, 2019
ca392c7
Implement infer var type context
velconia Mar 15, 2019
98d9552
update sqrt explaination, test=develop
shippingwang Mar 14, 2019
8ad672a
Support sync batch norm. (#16121)
qingqing01 Mar 15, 2019
ede33c6
fix formula in dropout
ceci3 Mar 15, 2019
cd82e2b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
ceci3 Mar 15, 2019
6cfd20d
fix words spell error test=develop
Aurelius84 Mar 15, 2019
6ce25c9
Merge branch 'develop' into runtime_context
luotao1 Mar 15, 2019
2d1e76f
fix API.spec test=develop
Aurelius84 Mar 15, 2019
1b59bed
Merge branch 'develop' into runtime_context
luotao1 Mar 15, 2019
46ee6bb
fix distributed unit-tests
luotao1 Mar 15, 2019
92b9ce3
Merge pull request #16073 from heavengate/yolov3_loss_imporve
panyx0718 Mar 15, 2019
74037cc
Merge branch 'develop' into yolo_box
heavengate Mar 15, 2019
721c2c0
refine fc_infershape
luotao1 Mar 15, 2019
81b4fad
add moving average absmax op and fix bug (#15155)
achao2013 Mar 15, 2019
c49e604
Merge pull request #16213 from qingqing01/compile_infer_shape
NHZlX Mar 15, 2019
cd906fc
change api.spec
ceci3 Mar 15, 2019
5ecdc49
set enable_runtime_context_cache_ default false
luotao1 Mar 15, 2019
50ff898
graph neural network for imperative mode
panyx0718 Mar 15, 2019
438bca9
Implement Runtime Var Type Inference
velconia Mar 15, 2019
9041b23
Polish code
velconia Mar 15, 2019
3be7e97
polish
panyx0718 Mar 15, 2019
b5078c2
Make infer var type virtual
velconia Mar 15, 2019
c0ddb93
Polish code
velconia Mar 15, 2019
3622537
Polish code
velconia Mar 15, 2019
b77ebb2
Merge pull request #15919 from heavengate/yolo_box
heavengate Mar 15, 2019
db0c970
Polish code
velconia Mar 15, 2019
38898c2
Merge pull request #16212 from Aurelius84/develop
luotao1 Mar 15, 2019
4b073c9
fix compiler
sneaxiy Mar 15, 2019
86e912c
Fix windows compiling (#16230)
qingqing01 Mar 16, 2019
efca4de
Fix cross_entropy bug (#16236)
Mar 17, 2019
7458114
Merge pull request #16228 from panyx0718/imperative
panyx0718 Mar 18, 2019
2579ade
Add cpu_quantize_pass for C-API quantization (#16127)
Mar 18, 2019
a275fd6
Merge branch 'develop' into runtime_context
luotao1 Mar 18, 2019
9d43597
not use PERSISTENT in batch_norm. test=develop
heavengate Mar 18, 2019
cc0ae1f
refine with comments
luotao1 Mar 18, 2019
6fa52f8
Merge branch 'develop' into fc_infershape
luotao1 Mar 18, 2019
d9f0e72
refine with comments
luotao1 Mar 18, 2019
3e9319f
add more imperative layer tests.
panyx0718 Mar 18, 2019
e89406a
Merge pull request #11 from PaddlePaddle/develop
junjun315 Mar 18, 2019
36dce65
Take DataType and VarType apart
velconia Mar 18, 2019
8e4ad00
Merge pull request #16198 from velconia/imperative_train_speed
velconia Mar 18, 2019
50931de
refine seq enum op
tensor-tang Mar 18, 2019
a7fe3b5
fix concat; test=develop
phlrain Mar 18, 2019
955fad7
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 18, 2019
dcba2e7
fix conncat; test=develop
phlrain Mar 18, 2019
a21fdde
fix lod reset op;test=develop
phlrain Mar 18, 2019
8ea4218
update load persistables for increment, test=develop (#15576)
seiriosPlus Mar 18, 2019
152beec
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 18, 2019
802b334
remove resize then seq num == 1; test=develop
phlrain Mar 18, 2019
10ececb
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 18, 2019
e46a94a
fix lod_rest; test=develop
phlrain Mar 18, 2019
c072998
Merge pull request #16219 from luotao1/fc_infershape
luotao1 Mar 18, 2019
96c54cd
Update lod_reset_op.cc
phlrain Mar 18, 2019
374abcf
Merge pull request #16247 from panyx0718/imperative
panyx0718 Mar 18, 2019
58c69a9
Update lod_reset_op.cc
phlrain Mar 18, 2019
e818fa1
Enable INT8 transpose kernel for MobileNet-SSD improvement. (#16159)
xiaolil1 Mar 18, 2019
9874530
Update lod_reset_op.cc
phlrain Mar 18, 2019
b40e41f
Polish code style
velconia Mar 18, 2019
6429d2a
Merge pull request #16188 from sneaxiy/fix_const_cast
sneaxiy Mar 18, 2019
161b8dd
Merge develop
sneaxiy Mar 18, 2019
8364688
Fix py_func_op's problem
velconia Mar 18, 2019
565b19b
fix set data type bug
velconia Mar 18, 2019
4f42504
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
shippingwang Mar 19, 2019
dbb92ee
Merge pull request #16002 from luotao1/runtime_context
luotao1 Mar 19, 2019
af03008
Add cpu_quantize_placement_pass for C-API quantization (#16265)
Mar 19, 2019
97c6051
add api.spec, test=develop
shippingwang Mar 19, 2019
7e20e76
Fix the bug in fp16 backward kernel (#16269)
Mar 19, 2019
13816dd
[MKL-DNN] Fix to crash of Transformer when mkldnn is to be used (#16233)
jczaja Mar 19, 2019
3a09693
change API name
sneaxiy Mar 18, 2019
f8df9eb
fix api doc (#16201)
sneaxiy Mar 19, 2019
c7f1f3e
Merge pull request #16214 from velconia/imperative_infer_var_type
velconia Mar 19, 2019
3f5f5ed
fix dropout doc
ceci3 Mar 19, 2019
27f7a72
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
ceci3 Mar 19, 2019
ead558b
Merge pull request #16256 from tensor-tang/refine/seqenum
tensor-tang Mar 19, 2019
9d2ccec
test=develop, fix doc
ceci3 Mar 19, 2019
c79cdf2
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
ceci3 Mar 19, 2019
7d2740d
Revert "cache runtime_context"
luotao1 Mar 19, 2019
d3acf68
Merge pull request #16258 from phlrain/fix_concat_1
phlrain Mar 19, 2019
c05af91
refine cos_sim infershape
luotao1 Mar 19, 2019
2271548
add allocator flags
zhhsplendid Mar 19, 2019
18911b6
[enhence] Make step_input of dynamic_rnn support custom lod level. (#…
wanghaoshuang Mar 19, 2019
b9fc80a
Merge pull request #16287 from PaddlePaddle/revert-16002-runtime_context
luotao1 Mar 19, 2019
ec88b6c
add channel wise quantization in ir pass.
wzzju Mar 19, 2019
82af803
add runtime_context_cache_pass
luotao1 Mar 19, 2019
023a3a3
fix op grad maker
sneaxiy Mar 19, 2019
5bb04ea
Merge pull request #12 from PaddlePaddle/develop
junjun315 Mar 19, 2019
bed0ecf
checkpoint pr be moved here, test=develop
junjun315 Mar 19, 2019
6382b62
Collective ops (#15572)
typhoonzero Mar 20, 2019
104a9f1
fix pattern maching conv2d with(out) ResidualData
Mar 20, 2019
9195c3b
Merge pull request #16280 from luotao1/cos_sim_infershape
luotao1 Mar 20, 2019
804afc5
Minor ngraph fix (#16270)
baojun-nervana Mar 20, 2019
d0ef682
Merge pull request #16274 from sneaxiy/fix_grad_maker
sneaxiy Mar 20, 2019
f26ba5b
Fuse AllReduce (#15921)
Mar 20, 2019
aba2713
fix comment. test=develop
heavengate Mar 20, 2019
1580be5
fix sequence pad; test=develop
phlrain Mar 20, 2019
dd080b1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 20, 2019
a3b8028
Merge pull request #16202 from shippingwang/add_sqrt_doc
shippingwang Mar 20, 2019
bb166a1
fix API.spec
sneaxiy Mar 19, 2019
2de263a
Add softmax_with_cross_entropy_op to ngraph engine (#16304)
baojun-nervana Mar 20, 2019
622fe6a
checkpoint pr be moved here, test=develop
junjun315 Mar 20, 2019
a5124ee
Merge pull request #16301 from luotao1/runtime_context_pass
luotao1 Mar 20, 2019
d0ce6a9
fix anakin converter registry (#15993)
Superjomn Mar 1, 2019
be523ba
Add anakin conv2d/relu/sigmoid/tanh converter (#15997)
fc500110 Mar 1, 2019
084310f
paddle-anakin: concat, split, pool2d converter#16003
NHZlX Mar 20, 2019
b21770a
cherry-pick from feature/anakin-engine: Add subgraph fuse support and…
NHZlX Mar 20, 2019
0945b97
cherry-pick feature/anakin-engine: add anakin softmax/transpose/batch…
fc500110 Mar 6, 2019
a32d420
cherry-pick from feature/anakin-engine: batch norm (#16110)
fc500110 Mar 7, 2019
a1d200a
cherry-pick from feature/anakin-engine: Anakin support facebox #16111
NHZlX Mar 20, 2019
69d37f8
cherry-pick from feature/anakin-engine: refine anakin subgraph. #16157
NHZlX Mar 20, 2019
c79f06d
cherry-pick from feature/anakin-engine: add batch interface for pd-an…
NHZlX Mar 20, 2019
a25331b
cherry-pick from feature/anakin-engine: deal the changing shape when …
NHZlX Mar 20, 2019
c407dfa
cherry-pick from feature/anakin-engine: refine paddle-anakin to new i…
NHZlX Mar 20, 2019
07dcf28
git cherry-pick from feature/anakin-engine: update anakin subgraph #1…
NHZlX Mar 20, 2019
4f4daa4
cherry-pick from feature/anakin-engine: add data type for zero copy #…
NHZlX Mar 20, 2019
957ea99
Merge pull request #16243 from heavengate/batch_norm_not_persistent
heavengate Mar 20, 2019
08838f3
Fix save inference model bug (#16242)
fc500110 Mar 20, 2019
8caa785
Enhance affine_channel_op infer-shape check (#16317)
qingqing01 Mar 20, 2019
9a05859
Merge pull request #16322 from wojtuss/wojtuss/fix_cpu_quantize_pass
luotao1 Mar 20, 2019
09442fb
checkpoint pr be moved here, test=develop
junjun315 Mar 20, 2019
cbe2dbf
Add enabling quantization (#16326)
Mar 21, 2019
8bebfe5
add resnet nccl2 dist training, mp training unit test (#16167)
typhoonzero Mar 21, 2019
8965819
rewrite the cuda kernels of channel_wise_quant_op and channe_wise_deq…
wzzju Mar 21, 2019
124f1df
Add flags for init and re-alloc gpu
zhhsplendid Mar 19, 2019
09e05a1
Merge pull request #16217 from ceci3/doc
shippingwang Mar 21, 2019
df5d19a
temoprarily disable the code of use kCUDNN, test=develop (#16205)
liupluswei Mar 21, 2019
b7baeed
fix win gpu build test=develop (#16334)
typhoonzero Mar 21, 2019
56c2d38
add elementwise floordiv, mod; test=develop
phlrain Mar 21, 2019
0e40298
fix matmul shape check; test=develop
phlrain Mar 21, 2019
be6e78b
test=develop;add approval RD check api
Mar 21, 2019
190cfd6
fix squeeze shape check; test=develop
phlrain Mar 21, 2019
e7fb344
fix squeeze op shape check; test=develop
phlrain Mar 21, 2019
a859bfc
test=develop
Mar 21, 2019
da39a70
Merge pull request #16350 from tianshuo78520a/approval_RD_api
panyx0718 Mar 21, 2019
d96f498
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 21, 2019
6a5545a
fix squeeze shape check; test=develop
phlrain Mar 21, 2019
3b46680
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 21, 2019
b3b7e35
Merge pull request #16259 from phlrain/fix_lod_reset
phlrain Mar 21, 2019
294cdf6
Merge pull request #16177 from fc500110/remove_visualizer
luotao1 Mar 21, 2019
249546b
fix concat shape check; test=develop
phlrain Mar 21, 2019
b3d0336
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 21, 2019
d2b938e
Refine gradient proto maker and python API for affine_channel_op (#16…
qingqing01 Mar 21, 2019
5670530
Revert "not use PERSISTENT in batch_norm. test=develop" (#16333)
heavengate Mar 21, 2019
ac32bf6
update input params type, test=develop
junjun315 Mar 21, 2019
4e3b5e7
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 21, 2019
8c81d99
Merge pull request #16347 from phlrain/fix_matmul_check
phlrain Mar 21, 2019
0d779f1
Merge pull request #16261 from phlrain/fix_sequence_pad_2
phlrain Mar 21, 2019
8274d9d
fix concat shape check; test=develop
phlrain Mar 21, 2019
ff11281
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 21, 2019
18d107c
add floordiv and mod op; test=develop
phlrain Mar 21, 2019
686b893
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 21, 2019
5dc9b51
fix time; test=develop
phlrain Mar 21, 2019
1544443
Merge pull request #16348 from phlrain/fix_squeeze_check
phlrain Mar 21, 2019
18a0f6d
Merge pull request #16351 from phlrain/fix_topk_shape_check
phlrain Mar 22, 2019
3396552
Add unit test for fuse all reduce (#16354)
Mar 22, 2019
090d25f
test=develop;fix docker build failed
tianshuo78520a Mar 22, 2019
77a0875
add var name in optimizer; test=develop
phlrain Mar 22, 2019
f3a2e4b
1. Add ANAKIN_ROOT compile option
NHZlX Mar 22, 2019
3df7b98
Merge branch 'develop' of https://github.com/paddlepaddle/paddle into…
NHZlX Mar 22, 2019
a6a3b2f
[Speed]Refine ParallelExecutor (#16190)
Mar 22, 2019
e5478ab
Merge pull request #16346 from phlrain/add_floordiv_and_mod
phlrain Mar 22, 2019
e3bca90
Merge pull request #16357 from phlrain/fix_concat_check
phlrain Mar 22, 2019
171df5b
Merge pull request #16303 from junjun315/checkpoint
velconia Mar 22, 2019
e235882
Enable MKL-DNN INT8 Concat Kernel. (#16156)
xiaolil1 Mar 22, 2019
ec11135
Merge pull request #16341 from wzzju/add_channel_wise_in_quant_pass
wzzju Mar 22, 2019
86511f5
Merge pull request #13 from PaddlePaddle/develop
junjun315 Mar 22, 2019
55a7b98
Add DeepCF model
panyx0718 Mar 22, 2019
dbc6579
Merge pull request #16378 from tianshuo78520a/fix_dockerfile
luotao1 Mar 22, 2019
d11d0e1
remove test_dist_transplier; test=develop
phlrain Mar 22, 2019
7dc4a7f
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Mar 22, 2019
6b971e1
remove test_dist_transplier; test=develop
phlrain Mar 22, 2019
5d6737b
Fix bug in affine_channel API (#16373)
qingqing01 Mar 22, 2019
466e150
Merge pull request #16380 from phlrain/add_var_name_in_opt_2
phlrain Mar 22, 2019
18779b5
[Operator] Add range op. (#15431)
wanghaoshuang Mar 22, 2019
2e5831f
[slim] Refine framework of slim and add filter pruning strategy (#16226)
wanghaoshuang Mar 23, 2019
431068c
Enhance test calibration script on accuracy assert
chuanqi129 Mar 23, 2019
c917c13
increase the time limite (#16405)
Mar 23, 2019
f906179
Merge pull request #16407 from chuanqi129/test_calibration_enhance
luotao1 Mar 23, 2019
b3f5876
Merge pull request #14 from PaddlePaddle/develop
junjun315 Mar 24, 2019
a93a9ee
add op registry type
sneaxiy Mar 22, 2019
072d95d
Merge develop
sneaxiy Mar 24, 2019
2f54d9f
Merge develop
sneaxiy Mar 20, 2019
a1d11bb
fix ci bug: cudnn handler in multi card
NHZlX Mar 25, 2019
1b6a2a0
fix mix input type error, test=develop
junjun315 Mar 25, 2019
b55dd32
Merge pull request #16394 from panyx0718/imperative2
panyx0718 Mar 25, 2019
c64d959
Merge pull request #16295 from zhhsplendid/zhenghuihuang-dev-2
sneaxiy Mar 25, 2019
46677fb
Move cpu_quantize_* passes into mkldnn subfolder
Mar 25, 2019
18aa594
fix mix input type error, test=develop
junjun315 Mar 25, 2019
f8ed2c2
try to fix ci error
sneaxiy Mar 24, 2019
33cb9d0
fix 404, test=develop
shanyi15 Mar 25, 2019
bc4d1c7
fix mix input type error, test=develop
junjun315 Mar 25, 2019
de3b70a
fix cdn issue, test=develop (#16423)
liupluswei Mar 25, 2019
2ccbfd5
Fix some bugs for quantization passes.
wzzju Mar 25, 2019
e9bec93
[slim] Add quantization strategy and distillation strategy. (#16408)
wanghaoshuang Mar 25, 2019
4cc9809
Merge pull request #15799 from sneaxiy/feature/decoupled_reader
sneaxiy Mar 26, 2019
85e1cc1
Update Readme with new accuracy and performance data measured on 6271…
chuanqi129 Mar 26, 2019
becf799
fix
panyx0718 Mar 26, 2019
0fff666
Merge pull request #16449 from panyx0718/imperative3
panyx0718 Mar 26, 2019
45b3766
fix comments
NHZlX Mar 26, 2019
7000ec8
fix some op grad maker
sneaxiy Mar 25, 2019
a7d0ac5
Merge develop
sneaxiy Mar 26, 2019
1f89249
update DeepCF model
panyx0718 Mar 26, 2019
fd24ab4
polish
panyx0718 Mar 26, 2019
f735102
add layer norm to Layers, add transformer test in imperative mode (#1…
JiabinYang Mar 26, 2019
7c5319b
Fix/test imperative ptb rnn (#16433)
JiabinYang Mar 26, 2019
1c11f81
Use the resolve hazard method.
wzzju Mar 26, 2019
27d0520
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
wzzju Mar 26, 2019
c512516
Update INT8 calibration README
chuanqi129 Mar 26, 2019
78fb3a6
fix env variable settting bug
sneaxiy Mar 26, 2019
ec6519e
Fix allreducedep bug (#16443)
gongweibao Mar 26, 2019
e0a3a49
Merge pull request #16438 from wojtuss/wojtuss/move-cpu-quantize-passes
luotao1 Mar 26, 2019
953bdde
Merge branch 'develop' of https://github.com/paddlepaddle/paddle into…
NHZlX Mar 26, 2019
4f2278f
Add doc for CPUPlace CUDAPlace CUDAPinPlace (#16442)
Mar 26, 2019
9ffd5ee
test fix fetch bar place for ce (#16406)
typhoonzero Mar 27, 2019
c34b24e
Merge pull request #16425 from junjun315/checkpoint-hotfix
junjun315 Mar 27, 2019
98802e1
Optimize the implementation of while_op again, for cases when is_test…
Xreki Mar 27, 2019
1eff834
update jitkernel doc (#16327)
tensor-tang Mar 27, 2019
f0070d9
Merge pull request #16436 from PaddlePaddle/shanyi15-patch-1
panyx0718 Mar 27, 2019
d68a02a
Merge pull request #16456 from wzzju/fix_quan_hang
wzzju Mar 27, 2019
a0f4fef
delete source file no_need_buffer_vars_inference.cc
sneaxiy Mar 27, 2019
fa1796a
Merge pull request #16330 from NHZlX/merge_anakin_branch_to_dev
NHZlX Mar 27, 2019
f8c279b
Merge pull request #16454 from panyx0718/imperative2
panyx0718 Mar 27, 2019
8d22bc1
Memory optimize (#16410)
liupluswei Mar 27, 2019
1b4e4e7
Merge pull request #16453 from chuanqi129/calibration_readme_refine
luotao1 Mar 27, 2019
850b737
Fix nparray.all() bug. (#16472)
gongweibao Mar 27, 2019
0d9d25d
Feature/refactor layers to Layers (#16337)
JiabinYang Mar 27, 2019
c300b1b
Tensor index (#16223)
Mar 27, 2019
9993651
Add from six.moves import reduce (#16435)
Mar 27, 2019
54a7357
Feature/install check (#16044)
JiabinYang Mar 27, 2019
c7c6eeb
Merge pull request #16409 from sneaxiy/feature/advance_gc
sneaxiy Mar 27, 2019
57dc3c1
Disable compare for Issue#16316 (#16466)
yihuaxu Mar 27, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
message(STATUS "AR tools: ${CMAKE_AR}")

if(WIN32)
set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
Expand Down Expand Up @@ -62,6 +64,7 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
option(WITH_ANAKIN "Compile with Anakin library" OFF)
option(ANAKIN_BUILD_FAT_BIN "Build anakin cuda fat-bin lib for all device plantform, ignored when WITH_ANAKIN=OFF" OFF)
option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plantform. ignored when WITH_ANAKIN=OFF" ON)
Expand Down Expand Up @@ -188,6 +191,7 @@ include(configure) # add paddle env configuration
if(WITH_GPU)
include(cuda)
include(tensorrt)
include(anakin_subgraph)
endif()
if(WITH_MKL OR WITH_MKLML)
include(external/anakin)
Expand Down
8 changes: 4 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ python \

This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the verbose range of 0 to 3, so you will see above example VLOG message, which is in level 3. This suggests that we output overall messages in lower verbose levels, so they display with higher probability. When coding C++, please follow the verbose level convention as follows:

- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math)
- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators/math/)
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ RUN curl -s -q https://glide.sh/get | sh
# and its size is only one-third of the official one.
# 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
# See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
RUN wget -qO- http://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
tar -xz -C /usr/local && \

RUN wget -q https://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.1.6-ubuntu14.04.x86_64-gnu.cuda.8.0.cudnn7.0.tar.gz --no-check-certificate && \
tar -zxf TensorRT-4.0.1.6-ubuntu14.04.x86_64-gnu.cuda.8.0.cudnn7.0.tar.gz -C /usr/local && \
cp -rf /usr/local/TensorRT/include /usr && \
cp -rf /usr/local/TensorRT/lib /usr

Expand Down
1 change: 0 additions & 1 deletion benchmark/fluid/fluid_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
else:
build_strategy.reduce_strategy = fluid.BuildStrategy(
).ReduceStrategy.AllReduce
build_strategy.fuse_broadcast_op = args.fuse_broadcast_op

avg_loss = train_args[0]

Expand Down
32 changes: 32 additions & 0 deletions cmake/anakin_subgraph.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
if(NOT WITH_GPU)
return()
endif()

set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
NO_DEFAULT_PATH
)

find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
PATHS ${ANAKIN_ROOT}
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
NO_DEFAULT_PATH
DOC "Path to ANAKIN library.")

if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
if(WITH_DSO)
set(ANAKIN_FOUND ON)
endif(WITH_DSO)
else()
set(ANAKIN_FOUND OFF)
endif()

if(ANAKIN_FOUND)
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
include_directories(${ANAKIN_ROOT}/include)
include_directories(${ANAKIN_ROOT}/include/saber)
link_directories(${ANAKIN_ROOT})
add_definitions(-DPADDLE_WITH_ANAKIN)
endif()
2 changes: 1 addition & 1 deletion cmake/external/boost.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set(BOOST_PROJECT "extern_boost")
# So we use 1.41.0 here.
set(BOOST_VER "1.41.0")
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)

MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/grpc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ ExternalProject_Add(
# 3. keep only zlib, cares, protobuf, boringssl under "third_party",
# checkout and clean other dirs under third_party
# 4. remove .git, and package the directory.
URL "http://paddlepaddledeps.cdn.bcebos.com/grpc-v1.10.x.tar.gz"
URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.10.x.tar.gz"
URL_MD5 "1f268a2aff6759839dccd256adcc91cf"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
Expand Down
20 changes: 14 additions & 6 deletions cmake/external/mkldnn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ IF(APPLE)
return()
ENDIF()

MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path")
# Introduce variables:
# * CMAKE_INSTALL_LIBDIR
INCLUDE(GNUInstallDirs)
SET(LIBDIR "lib")
if(CMAKE_INSTALL_LIBDIR MATCHES ".*lib64$")
SET(LIBDIR "lib64")
endif()

MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/l${LIBDIR} to runtime path")
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib")
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/${LIBDIR}")

INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) # For MKLDNN code to include internal headers.

Expand All @@ -58,7 +66,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/intel/mkl-dnn.git"
GIT_TAG "830a10059a018cd2634d94195140cf2d8790a75a"
GIT_TAG "863ff6e7042cec7d2e29897fe9f0872e0888b0fc"
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
Expand All @@ -79,9 +87,9 @@ ExternalProject_Add(
-DMKLROOT:PATH=${MKLML_ROOT}
)
if(WIN32)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE)
else(WIN32)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE)
endif(WIN32)

ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL)
Expand All @@ -101,7 +109,7 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# copy the real so.0 lib to install dir
# it can be directly contained in wheel or capi
if(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/lib/mkldnn.dll)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll)
else(WIN32)
SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0)
ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB}
Expand Down
4 changes: 2 additions & 2 deletions cmake/external/mklml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
SET(TIME_VERSION "2019.0.1.20181227")
IF(WIN32)
SET(MKLML_VER "mklml_win_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll)
Expand All @@ -43,7 +43,7 @@ ELSE()
#TODO(intel-huying):
# Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
SET(MKLML_VER "Glibc225_vsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so)
SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so)
Expand Down
2 changes: 1 addition & 1 deletion cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ function(op_library TARGET)
# Define operators that don't need pybind here.
foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op"
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op")
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op")
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
Expand Down
1 change: 1 addition & 0 deletions cmake/tensorrt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ if(TENSORRT_FOUND)
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()
Loading