Skip to content

Paddle V4 API - Word to Vec #10214

@helinwang

Description

@helinwang

API design: #10152

import paddle.fluid as fluid
import paddle

IS_SPARSE = True
BATCH_SIZE = 32
NUM_EPOCHS = 3
N = 5


class Word2Vec(fluid.Program):
  EMBED_SIZE = 32
  HIDDEN_SIZE = 256

  def __init__(dict_size, is_sparse):
      self.dict_size = dict_size
      self.is_sparse = is_sparse

  def predict(self):
      embeds = []
      num_embeds = 4
      for i in range(num_embeds):
        # every embedding will share the same parameter
        with fluid.var_scope("shared_embedding"):
            embed = fluid.layers.embedding(
                input=first_word,
                size=[self.dict_size, EMBED_SIZE],
                dtype='float32',
                is_sparse=self.is_sparse)
            embeds.append(embed)

      concat_embed = fluid.layers.concat(
          input= embeds, axis=1)
      hidden1 = fluid.layers.fc(input=concat_embed,
                                size=HIDDEN_SIZE,
                                act='sigmoid')
      predict_word = fluid.layers.fc(input=hidden1,
                                     size=self.dict_size,
                                     act='softmax')
      return predict_word

  @network("firstw", "secondw", "thirdw", "forthw", "nextw")
  def train_step(self):
      first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
      second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
      third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
      forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
      next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
      with fluid.var_scope("predict"):
         predict_word = self.predict(first_word, second_word, third_word, forth_word)
      cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
      avg_cost = fluid.layers.mean(cost)
      sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
      sgd_optimizer.minimize(avg_cost)
      return avg_cost

  @network("firstw", "secondw", "thirdw", "forthw")
  def infer(self):
      first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
      second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
      third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
      forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64')
      with fluid.var_scope("predict"):
          predict_word = self.predict(first_word, second_word, third_word, forth_word)
      return predict_word

# training and infer begins here
word_dict = paddle.dataset.imikolov.build_dict()
word2vec = Word2Vec(len(word_dict), IS_SPARSE).Compile()
train_reader = paddle.batch(
    paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)

for epoch_id in range(NUM_EPOCHS):
    for data in train_reader():
        avg_cost = word2vec.train_step(data[0], data[1], data[2], data[3], data[4])

next_word = word2vec.infer(1,2,3,4)

# save inference parameters
fluid.save_parameters(word2vec.infer, "./infer_params")

# load inference parameters example
# `Compile` initializes the parameters, but will be overridden by `fluid.load_parameters`.
word2vec_new = Word2Vec(len(word_dict), IS_SPARSE).Compile()
fluid.load_parameters(word2vec_new.infer, "./infer_params")
next_word = word2vec_new.infer(1,2,3,4)

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions