Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 21 additions & 71 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ function cmake_gen() {
-DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All}
-DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
-DWITH_PYTHON=${WITH_PYTHON:-ON}
-DCUDNN_ROOT=/usr/
-DWITH_TESTING=${WITH_TESTING:-ON}
Expand Down Expand Up @@ -228,6 +229,7 @@ EOF
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
-DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \
-DWITH_TESTING=${WITH_TESTING:-ON} \
Expand Down Expand Up @@ -561,57 +563,6 @@ function bind_test() {
wait
}

function parallel_test() {
mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build
if [ ${WITH_TESTING:-ON} == "ON" ] ; then
cat <<EOF
========================================
Running unit tests ...
========================================
EOF

# calculate and set the memory usage for each process
# MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`)
# export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE

EXIT_CODE=0;
pids=()

# get the CUDA device count
CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l)
# each test case would occupy two graph cards
NUM_PROC=$[CUDA_DEVICE_COUNT/2]
for (( i = 0; i < $NUM_PROC; i++ )); do
# CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus
# ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest
if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
env CUDA_VISIBLE_DEVICES=$[i*2],$[i*2+1] ctest -I $i,,$NUM_PROC -V &
pids+=($!)
else
env CUDA_VISIBLE_DEVICES=$[i*2],$[i*2+1] ctest -I $i,,$NUM_PROC --output-on-failure &
pids+=($!)
fi
done

clen=`expr "${#pids[@]}" - 1` # get length of commands - 1
for i in `seq 0 "$clen"`; do
wait ${pids[$i]}
CODE=$?
if [[ "${CODE}" != "0" ]]; then
echo "At least one test failed with exit code => ${CODE}" ;
EXIT_CODE=1;
fi
done
wait; # wait for all subshells to finish

echo "EXIT_CODE => $EXIT_CODE"
if [[ "${EXIT_CODE}" != "0" ]]; then
exit "$EXIT_CODE"
fi
fi
}

EXIT_CODE=0;
function caught_error() {
for job in `jobs -p`; do
Expand Down Expand Up @@ -657,7 +608,6 @@ function card_test() {
cuda_list="$cuda_list,$[i*cardnumber+j]"
fi
done
# echo $cuda_list
if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then
if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then
ctest -I $i,,$NUM_PROC -R "($testcases)" -V &
Expand All @@ -675,26 +625,27 @@ function card_test() {
done

wait; # wait for all subshells to finish
set +m
}

function aggresive_test() {
function parallel_test() {
mkdir -p ${PADDLE_ROOT}/build
cd ${PADDLE_ROOT}/build
if [ ${WITH_TESTING:-ON} == "ON" ] ; then
cat <<EOF
========================================
Running unit tests ...
Running unit tests in parallel way ...
========================================
EOF

set +x
EXIT_CODE=0;
test_cases=$(ctest -N -V)
exclusive_tests=''
single_card_tests=''
multiple_card_tests=''
is_exclusive=''
is_multicard=''
test_cases=$(ctest -N -V) # get all test cases
exclusive_tests='' # cases list which would be run exclusively
single_card_tests='' # cases list which would take one graph card
multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs
is_exclusive='' # indicate whether the case is exclusive type
is_multicard='' # indicate whether the case is multiple GPUs type
while read -r line; do
if [[ "$line" == "" ]]; then
continue
Expand All @@ -703,15 +654,15 @@ set +x
if [[ "$matchstr" == "" ]]; then
# Any test case with LABELS property would be parse here
# RUN_TYPE=EXCLUSIVE mean the case would run exclusively
# RUN_TYPE=DIST mean the case would take two graph cards during runtime
# RUN_TYPE=DIST mean the case would take two graph GPUs during runtime
read is_exclusive <<< $(echo "$line"|grep -oEi "RUN_TYPE=EXCLUSIVE")
read is_multicard <<< $(echo "$line"|grep -oEi "RUN_TYPE=DIST")
continue
fi
read testcase <<< $(echo "$line"|grep -oEi "\w+$")

if [[ "$is_multicard" == "" ]]; then
# trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 cards
# trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 GPUs
read is_multicard <<< $(echo "$testcase"|grep -oEi "test_dist")
fi

Expand Down Expand Up @@ -740,9 +691,9 @@ set +x
testcase=''
done <<< "$test_cases";

card_test "$single_card_tests" 1
card_test "$multiple_card_tests" 2
card_test "$exclusive_tests"
card_test "$single_card_tests" 1 # run cases with single GPU
card_test "$multiple_card_tests" 2 # run cases with two GPUs
card_test "$exclusive_tests" # run cases exclusively, in this cases would be run with 4/8 GPUs
if [[ "$EXIT_CODE" != "0" ]]; then
exit 1;
fi
Expand Down Expand Up @@ -942,7 +893,8 @@ EOF
if [[ "$1" != "" ]]; then
parallel_number=$1
fi
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-Auto} -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN}

make -j ${parallel_number} fluid_lib_dist
make -j ${parallel_number} inference_lib_dist
}
Expand Down Expand Up @@ -994,7 +946,7 @@ function main() {
gen_dockerfile ${PYTHON_ABI:-""}
;;
test)
aggresive_test
parallel_test
;;
single_test)
single_test $2
Expand Down Expand Up @@ -1024,9 +976,7 @@ function main() {
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
assert_api_not_changed ${PYTHON_ABI:-""}
aggresive_test
gen_fluid_lib ${parallel_number}
test_fluid_lib
parallel_test
assert_api_spec_approvals
;;
cicheck_brpc)
Expand Down Expand Up @@ -1057,7 +1007,7 @@ function main() {
cicheck_py35)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
aggresive_test
parallel_test
assert_api_not_changed ${PYTHON_ABI:-""}
;;
cmake_gen)
Expand Down