Skip to content

Commit 655f769

Browse files
committed
refine tf mnist
1 parent daa70c1 commit 655f769

File tree

1 file changed

+122
-126
lines changed

1 file changed

+122
-126
lines changed

tensorflow/mnist.py

Lines changed: 122 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -10,140 +10,136 @@
1010
import paddle.v2 as paddle
1111
import paddle.v2.fluid as fluid
1212

13-
BATCH_SIZE = 128
14-
PASS_NUM = 5
15-
SEED = 1
1613
DTYPE = tf.float32
1714

1815

19-
def normal_scale(size, channels):
20-
scale = (2.0 / (size**2 * channels))**0.5
21-
return scale
22-
23-
24-
# NOTE(dzhwinter) : tensorflow use Phliox random algorithm
25-
# as normal generator, fetch out paddle random for comparization
26-
def paddle_random_normal(shape, loc=.0, scale=1., seed=1, dtype="float32"):
27-
program = fluid.framework.Program()
28-
block = program.global_block()
29-
w = block.create_var(
30-
dtype="float32",
31-
shape=shape,
32-
lod_level=0,
33-
name="param",
34-
initializer=fluid.initializer.NormalInitializer(
35-
loc=.0, scale=scale, seed=seed))
36-
place = fluid.CPUPlace()
37-
exe = fluid.Executor(place)
38-
out = exe.run(program, fetch_list=[w])
39-
return np.array(out[0])
40-
41-
42-
train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE)
43-
images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
44-
labels = tf.placeholder(tf.int64, shape=(None, ))
45-
46-
# conv layer
47-
arg = tf.convert_to_tensor(
48-
np.transpose(
49-
paddle_random_normal(
50-
[20, 1, 5, 5], scale=normal_scale(5, 1), seed=SEED, dtype=DTYPE),
51-
axes=[2, 3, 1, 0]))
52-
conv1_weights = tf.Variable(arg)
53-
conv1_bias = tf.Variable(tf.zeros([20]), dtype=DTYPE)
54-
conv1 = tf.nn.conv2d(
55-
images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
56-
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
57-
pool1 = tf.nn.max_pool(
58-
relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
59-
60-
arg = tf.convert_to_tensor(
61-
np.transpose(
62-
paddle_random_normal(
63-
[50, 20, 5, 5], scale=normal_scale(5, 20), seed=SEED, dtype=DTYPE),
64-
axes=[2, 3, 1, 0]))
65-
conv2_weights = tf.Variable(arg)
66-
conv2_bias = tf.Variable(tf.zeros([50]), dtype=DTYPE)
67-
conv2 = tf.nn.conv2d(
68-
pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
69-
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
70-
pool2 = tf.nn.max_pool(
71-
relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
72-
73-
pool_shape = pool2.get_shape().as_list()
74-
hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
75-
reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
76-
77-
# fc layer
78-
# NOTE(dzhwinter) : paddle has a NCHW data format, tensorflow has a NHWC data format
79-
# need to convert the fc weight
80-
paddle_weight = paddle_random_normal(
81-
[hidden_dim, 10],
82-
scale=normal_scale(hidden_dim, 10),
83-
seed=SEED,
84-
dtype=DTYPE)
85-
new_shape = pool_shape[-1:] + pool_shape[1:-1] + [10]
86-
paddle_weight = np.reshape(paddle_weight, new_shape)
87-
paddle_weight = np.transpose(paddle_weight, [1, 2, 0, 3])
88-
89-
arg = tf.convert_to_tensor(np.reshape(paddle_weight, [hidden_dim, 10]))
90-
fc_weights = tf.Variable(arg, dtype=DTYPE)
91-
fc_bias = tf.Variable(tf.zeros([10]), dtype=DTYPE)
92-
logits = tf.matmul(reshape, fc_weights) + fc_bias
93-
94-
# cross entropy
95-
96-
prediction = tf.nn.softmax(logits)
97-
98-
one_hot_labels = tf.one_hot(labels, depth=10)
99-
cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
100-
avg_cost = tf.reduce_mean(cost)
101-
102-
correct = tf.equal(tf.argmax(prediction, 1), labels)
103-
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
104-
g_accuracy = tf.metrics.accuracy(labels, tf.argmax(prediction, axis=1))
105-
106-
opt = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999)
107-
train_op = opt.minimize(avg_cost)
108-
109-
110-
def eval_test():
16+
def parse_args():
17+
parser = argparse.ArgumentParser("mnist model benchmark.")
18+
parser.add_argument(
19+
'--batch_size', type=int, default=128, help='The minibatch size.')
20+
parser.add_argument(
21+
'--iterations', type=int, default=35, help='The number of minibatches.')
22+
parser.add_argument(
23+
'--pass_num', type=int, default=5, help='The number of passes.')
24+
parser.add_argument(
25+
'--device',
26+
type=str,
27+
default='GPU',
28+
choices=['CPU', 'GPU'],
29+
help='The device type.')
30+
args = parser.parse_args()
31+
return args
32+
33+
34+
def run_benchmark(args):
35+
def weight_variable(dtype, shape):
36+
initial = tf.truncated_normal(shape, stddev=0.1, dtype=dtype)
37+
return tf.Variable(initial)
38+
39+
def bias_variable(dtype, shape):
40+
initial = tf.constant(0.1, shape=shape, dtype=dtype)
41+
return tf.Variable(initial)
42+
43+
device = '/cpu:0' if args.device == 'CPU' else '/device:GPU:0'
44+
with tf.device(device):
45+
46+
images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
47+
labels = tf.placeholder(tf.int64, shape=(None, ))
48+
49+
conv1_weights = weight_variable(DTYPE, [5, 5, 1, 20])
50+
conv1_bias = bias_variable(DTYPE, [20])
51+
conv1 = tf.nn.conv2d(
52+
images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
53+
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
54+
pool1 = tf.nn.max_pool(
55+
relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
56+
57+
conv2_weights = weight_variable(DTYPE, [5, 5, 20, 50])
58+
conv2_bias = bias_variable(DTYPE, [50])
59+
conv2 = tf.nn.conv2d(
60+
pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
61+
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
62+
pool2 = tf.nn.max_pool(
63+
relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
64+
65+
pool_shape = pool2.get_shape().as_list()
66+
hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
67+
reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
68+
69+
fc_weights = weight_variable(DTYPE, [hidden_dim, 10])
70+
fc_bias = bias_variable(DTYPE, [10])
71+
logits = tf.matmul(reshape, fc_weights) + fc_bias
72+
prediction = tf.nn.softmax(logits)
73+
74+
one_hot_labels = tf.one_hot(labels, depth=10)
75+
cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
76+
avg_cost = tf.reduce_mean(cost)
77+
78+
correct = tf.equal(tf.argmax(prediction, 1), labels)
79+
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
80+
g_accuracy = tf.metrics.accuracy(labels, tf.argmax(prediction, axis=1))
81+
82+
opt = tf.train.AdamOptimizer(
83+
learning_rate=0.001, beta1=0.9, beta2=0.999)
84+
train_op = opt.minimize(avg_cost)
85+
# train_op = tf.train.AdamOptimizer(1e-4).minimize(avg_cost)
86+
87+
train_reader = paddle.batch(
88+
paddle.dataset.mnist.train(), batch_size=args.batch_size)
11189
test_reader = paddle.batch(
112-
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
113-
for batch_id, data in enumerate(test_reader()):
114-
images_data = np.array(
115-
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
116-
labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
117-
_, loss, acc, g_acc = sess.run(
118-
[train_op, avg_cost, accuracy, g_accuracy],
119-
feed_dict={images: images_data,
120-
labels: labels_data})
121-
return g_acc[1]
122-
123-
124-
config = tf.ConfigProto(
125-
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
126-
with tf.Session(config=config) as sess:
127-
init_g = tf.global_variables_initializer()
128-
init_l = tf.local_variables_initializer()
129-
sess.run(init_g)
130-
sess.run(init_l)
131-
for pass_id in range(PASS_NUM):
132-
pass_start = time.time()
133-
for batch_id, data in enumerate(train_reader()):
90+
paddle.dataset.mnist.test(), batch_size=args.batch_size)
91+
92+
def eval_test():
93+
for batch_id, data in enumerate(test_reader()):
13494
images_data = np.array(
13595
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
13696
labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
137-
start = time.time()
97+
13898
_, loss, acc, g_acc = sess.run(
13999
[train_op, avg_cost, accuracy, g_accuracy],
140100
feed_dict={images: images_data,
141101
labels: labels_data})
142-
end = time.time()
143-
144-
print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
145-
(pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
146-
pass_end = time.time()
147-
test_avg_acc = eval_test()
148-
print("pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f" %
149-
(pass_id, g_acc[1], test_avg_acc, (pass_end - pass_start) / 1000))
102+
return g_acc[1]
103+
104+
config = tf.ConfigProto(
105+
intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
106+
with tf.Session(config=config) as sess:
107+
init_g = tf.global_variables_initializer()
108+
init_l = tf.local_variables_initializer()
109+
sess.run(init_g)
110+
sess.run(init_l)
111+
for pass_id in range(args.pass_num):
112+
pass_start = time.time()
113+
for batch_id, data in enumerate(train_reader()):
114+
images_data = np.array(
115+
map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
116+
labels_data = np.array(map(lambda x: x[1], data)).astype(
117+
"int64")
118+
start = time.time()
119+
_, loss, acc, g_acc = sess.run(
120+
[train_op, avg_cost, accuracy, g_accuracy],
121+
feed_dict={images: images_data,
122+
labels: labels_data})
123+
end = time.time()
124+
125+
print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
126+
(pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
127+
pass_end = time.time()
128+
test_avg_acc = eval_test()
129+
print(
130+
"pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f"
131+
% (pass_id, g_acc[1], test_avg_acc,
132+
(pass_end - pass_start) / 1000))
133+
134+
135+
def print_arguments(args):
136+
print('----------- Configuration Arguments -----------')
137+
for arg, value in sorted(vars(args).iteritems()):
138+
print('%s: %s' % (arg, value))
139+
print('------------------------------------------------')
140+
141+
142+
if __name__ == '__main__':
143+
args = parse_args()
144+
print_arguments(args)
145+
run_benchmark(args)

0 commit comments

Comments
 (0)