Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions benchmark/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl
caffe/image/logs
tensorflow/image/logs
tensorflow/rnn/logs
fluid/models/*.pyc
fluid/logs
fluid/nohup.out
31 changes: 17 additions & 14 deletions benchmark/fluid/fluid_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,7 @@ def parse_args():
parser.add_argument(
'--batch_size', type=int, default=32, help='The minibatch size.')
parser.add_argument(
'--learning_rate',
type=float,
default=0.001,
help='The minibatch size.')
'--learning_rate', type=float, default=0.001, help='The learning rate.')
# TODO(wuyi): add "--use_fake_data" option back.
parser.add_argument(
'--skip_batch_num',
Expand Down Expand Up @@ -72,6 +69,11 @@ def parse_args():
type=int,
default=1,
help='If gpus > 1, will use ParallelExecutor to run, else use Executor.')
parser.add_argument(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did not see changes to deal with --cpus?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Remove it and will add in next PR when using ParallelDo to do multi-thread CPU training.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

'--cpus',
type=int,
default=1,
help='If cpus > 1, will use ParallelDo to run, else use Executor.')
parser.add_argument(
'--data_set',
type=str,
Expand Down Expand Up @@ -231,10 +233,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
train_losses.append(loss)
print("Pass: %d, Iter: %d, Loss: %f\n" %
(pass_id, iters, np.mean(train_losses)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sec\n' %
(num_samples, train_elapsed, examples_per_sec))
print_train_time(start_time, time.time(), num_samples)
print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses)))
# evaluation
if not args.no_test and batch_acc != None:
Expand Down Expand Up @@ -315,10 +314,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
if batch_id % 1 == 0:
print("Pass %d, batch %d, loss %s" %
(pass_id, batch_id, np.array(loss)))
train_elapsed = time.time() - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
print_train_time(start_time, time.time(), num_samples)
if not args.no_test and batch_acc != None:
test_acc = test(startup_exe, infer_prog, test_reader, feeder,
batch_acc)
Expand All @@ -329,20 +325,27 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
def print_arguments(args):
vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and
vars(args)['device'] == 'GPU')
print('----------- resnet Configuration Arguments -----------')
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')


def print_train_time(start_time, end_time, num_samples):
train_elapsed = end_time - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))


def main():
args = parse_args()
print_arguments(args)

# the unique trainer id, starting from 0, needed by trainer
# only
nccl_id_var, num_trainers, trainer_id = (
None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1")))
None, 1, int(os.getenv("PADDLE_TRAINER_ID", "0")))

if args.use_cprof:
pr = cProfile.Profile()
Expand Down
20 changes: 11 additions & 9 deletions benchmark/fluid/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU.

mkdir -p logs
#export FLAGS_fraction_of_gpu_memory_to_use=0.0
export CUDNN_PATH=/paddle/cudnn_v5

Expand Down Expand Up @@ -35,6 +36,7 @@ nohup stdbuf -oL nvidia-smi \
--format=csv \
--filename=mem.log \
-l 1 &

# mnist
# mnist gpu mnist 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
Expand All @@ -43,7 +45,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=128 \
--skip_batch_num=5 \
--iterations=500 \
2>&1 | tee -a mnist_gpu_128.log
2>&1 | tee -a logs/mnist_gpu_128.log

# vgg16
# gpu cifar10 128
Expand All @@ -53,7 +55,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=128 \
--skip_batch_num=5 \
--iterations=30 \
2>&1 | tee -a vgg16_gpu_128.log
2>&1 | tee -a logs/vgg16_gpu_128.log

# flowers gpu 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
Expand All @@ -63,28 +65,28 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--data_set=flowers \
--skip_batch_num=5 \
--iterations=30 \
2>&1 | tee -a vgg16_gpu_flowers_32.log
2>&1 | tee -a logs/vgg16_gpu_flowers_32.log

# resnet50
# resnet50 gpu cifar10 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=resnet50 \
--model=resnet \
--device=GPU \
--batch_size=128 \
--data_set=cifar10 \
--skip_batch_num=5 \
--iterations=30 \
2>&1 | tee -a resnet50_gpu_128.log
2>&1 | tee -a logs/resnet50_gpu_128.log

# resnet50 gpu flowers 64
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--model=resnet50 \
--model=resnet \
--device=GPU \
--batch_size=64 \
--data_set=flowers \
--skip_batch_num=5 \
--iterations=30 \
2>&1 | tee -a resnet50_gpu_flowers_64.log
2>&1 | tee -a logs/resnet50_gpu_flowers_64.log

# lstm
# lstm gpu imdb 32 # tensorflow only support batch=32
Expand All @@ -94,7 +96,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=32 \
--skip_batch_num=5 \
--iterations=30 \
2>&1 | tee -a lstm_gpu_32.log
2>&1 | tee -a logs/lstm_gpu_32.log

# seq2seq
# seq2seq gpu wmb 128
Expand All @@ -104,4 +106,4 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size=128 \
--skip_batch_num=5 \
--iterations=30 \
2>&1 | tee -a lstm_gpu_128.log
2>&1 | tee -a logs/lstm_gpu_128.log