Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.

Commit 928d178

Browse files
authored
Chap 4 word2vec: polish chapter content (#541)
1 parent 1b3bb17 commit 928d178

File tree

3 files changed

+55
-42
lines changed

3 files changed

+55
-42
lines changed

04.word2vec/README.md

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,9 @@ def train(use_cuda, train_program, params_dirname):
329329
if event.step % 10 == 0:
330330
print "Step %d: Average Cost %f" % (event.step, avg_cost)
331331

332-
# If average cost is lower than 5.0, we consider the model good enough to stop.
332+
# If average cost is lower than 5.8, we consider the model good enough to stop.
333+
# Note 5.8 is a relatively high value. In order to get a better model, one should
334+
# aim for avg_cost lower than 3.5. But the training could take longer time.
333335
if avg_cost < 5.8:
334336
trainer.save_params(params_dirname)
335337
trainer.stop()
@@ -383,16 +385,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
383385
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
384386
# one lod_level and there is only one sequence of one word on this level.
385387
# Note that lod info should be a list of lists.
386-
lod1 = [[211]] # 'among'
387-
lod2 = [[6]] # 'a'
388-
lod3 = [[96]] # 'group'
389-
lod4 = [[4]] # 'of'
390-
base_shape = [1]
391388

392-
first_word = fluid.create_lod_tensor(lod1, base_shape, place)
393-
second_word = fluid.create_lod_tensor(lod2, base_shape, place)
394-
third_word = fluid.create_lod_tensor(lod3, base_shape, place)
395-
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
389+
data1 = [[211]] # 'among'
390+
data2 = [[6]] # 'a'
391+
data3 = [[96]] # 'group'
392+
data4 = [[4]] # 'of'
393+
lod = [[1]]
394+
395+
first_word = fluid.create_lod_tensor(data1, lod, place)
396+
second_word = fluid.create_lod_tensor(data2, lod, place)
397+
third_word = fluid.create_lod_tensor(data3, lod, place)
398+
fourth_word = fluid.create_lod_tensor(data4, lod, place)
396399

397400
result = inferencer.infer(
398401
{
@@ -406,16 +409,18 @@ def infer(use_cuda, inference_program, params_dirname=None):
406409
print(numpy.array(result[0]))
407410
most_possible_word_index = numpy.argmax(result[0])
408411
print(most_possible_word_index)
409-
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
412+
print([
413+
key for key, value in word_dict.iteritems()
414+
if value == most_possible_word_index
415+
][0])
410416
```
411417

412-
When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
418+
When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `a`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
413419

414420
```text
415-
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
416-
8.9233123e-05 1.5757295e-01]]
417-
2072
418-
<unk>
421+
[[0.00106646 0.0007907 0.00072041 ... 0.00049024 0.00041355 0.00084464]]
422+
6
423+
a
419424
```
420425

421426
The main entrance of the program is fairly simple:

04.word2vec/index.html

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@
371371
if event.step % 10 == 0:
372372
print "Step %d: Average Cost %f" % (event.step, avg_cost)
373373

374-
# If average cost is lower than 5.0, we consider the model good enough to stop.
374+
# If average cost is lower than 5.8, we consider the model good enough to stop.
375+
# Note 5.8 is a relatively high value. In order to get a better model, one should
376+
# aim for avg_cost lower than 3.5. But the training could take longer time.
375377
if avg_cost < 5.8:
376378
trainer.save_params(params_dirname)
377379
trainer.stop()
@@ -425,16 +427,17 @@
425427
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
426428
# one lod_level and there is only one sequence of one word on this level.
427429
# Note that lod info should be a list of lists.
428-
lod1 = [[211]] # 'among'
429-
lod2 = [[6]] # 'a'
430-
lod3 = [[96]] # 'group'
431-
lod4 = [[4]] # 'of'
432-
base_shape = [1]
433430

434-
first_word = fluid.create_lod_tensor(lod1, base_shape, place)
435-
second_word = fluid.create_lod_tensor(lod2, base_shape, place)
436-
third_word = fluid.create_lod_tensor(lod3, base_shape, place)
437-
fourth_word = fluid.create_lod_tensor(lod4, base_shape, place)
431+
data1 = [[211]] # 'among'
432+
data2 = [[6]] # 'a'
433+
data3 = [[96]] # 'group'
434+
data4 = [[4]] # 'of'
435+
lod = [[1]]
436+
437+
first_word = fluid.create_lod_tensor(data1, lod, place)
438+
second_word = fluid.create_lod_tensor(data2, lod, place)
439+
third_word = fluid.create_lod_tensor(data3, lod, place)
440+
fourth_word = fluid.create_lod_tensor(data4, lod, place)
438441

439442
result = inferencer.infer(
440443
{
@@ -448,16 +451,18 @@
448451
print(numpy.array(result[0]))
449452
most_possible_word_index = numpy.argmax(result[0])
450453
print(most_possible_word_index)
451-
print([key for key, value in word_dict.iteritems() if value == most_possible_word_index][0])
454+
print([
455+
key for key, value in word_dict.iteritems()
456+
if value == most_possible_word_index
457+
][0])
452458
```
453459

454-
When we spent 30 mins in training, the output is like below, which means the next word for `among a group of` is `unknown`. After several hours training, it gives a meaningful prediction as `workers`.
460+
When we spent 3 mins in training, the output is like below, which means the next word for `among a group of` is `board`. If we train the model with a longer time, it will give a meaningful prediction as `workers`.
455461

456462
```text
457-
[[4.0056456e-02 5.4810006e-02 5.3107393e-05 ... 1.0061498e-04
458-
8.9233123e-05 1.5757295e-01]]
459-
2072
460-
<unk>
463+
[[0.00144043 0.00073983 0.00042264 ... 0.00061815 0.00038701 0.00099838]]
464+
142
465+
board
461466
```
462467

463468
The main entrance of the program is fairly simple:

04.word2vec/train.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ def event_handler(event):
107107
if event.step % 10 == 0:
108108
print "Step %d: Average Cost %f" % (event.step, avg_cost)
109109

110+
# If average cost is lower than 5.8, we consider the model good enough to stop.
111+
# Note 5.8 is a relatively high value. In order to get a better model, one should
112+
# aim for avg_cost lower than 3.5. But the training could take longer time.
110113
if avg_cost < 5.8:
111114
trainer.save_params(params_dirname)
112115
trainer.stop()
@@ -138,17 +141,17 @@ def infer(use_cuda, inference_program, params_dirname=None):
138141
# detail (lod) info of each LoDtensor should be [[1]] meaning there is only
139142
# one lod_level and there is only one sequence of one word on this level.
140143
# Note that lod info should be a list of lists.
144+
145+
data1 = [[211]] # 'among'
146+
data2 = [[6]] # 'a'
147+
data3 = [[96]] # 'group'
148+
data4 = [[4]] # 'of'
141149
lod = [[1]]
142-
base_shape = [1]
143-
# The range of random integers is [low, high]
144-
first_word = fluid.create_random_int_lodtensor(
145-
lod, base_shape, place, low=0, high=dict_size - 1)
146-
second_word = fluid.create_random_int_lodtensor(
147-
lod, base_shape, place, low=0, high=dict_size - 1)
148-
third_word = fluid.create_random_int_lodtensor(
149-
lod, base_shape, place, low=0, high=dict_size - 1)
150-
fourth_word = fluid.create_random_int_lodtensor(
151-
lod, base_shape, place, low=0, high=dict_size - 1)
150+
151+
first_word = fluid.create_lod_tensor(data1, lod, place)
152+
second_word = fluid.create_lod_tensor(data2, lod, place)
153+
third_word = fluid.create_lod_tensor(data3, lod, place)
154+
fourth_word = fluid.create_lod_tensor(data4, lod, place)
152155

153156
result = inferencer.infer(
154157
{

0 commit comments

Comments
 (0)