Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions paddle/fluid/API.spec
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ paddle.fluid.ParallelExecutor.run (ArgSpec(args=['self', 'fetch_list', 'feed', '
paddle.fluid.create_lod_tensor (ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None), ('document', 'b82ea20e2dc5ff2372e0643169ca47ff'))
paddle.fluid.create_random_int_lodtensor (ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None), ('document', '74dc6d23185d90a7a50fbac19f5b65fb'))
paddle.fluid.DataFeedDesc.__init__ (ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4294493e31c4bc9fc4bd48753044235f'))
paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21'))
paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766'))
paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690'))
paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '75283b5f03ec7b6f74bfca9881a37428'))
paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '68df53d3ea0f24063bf7689e82c2b82e'))
paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'd5a78553cd94fe64148399797055d8ad'))
paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '88d229ea9f892ce8d2922cf028c8bb3a'))
paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from', 'places'], varargs=None, keywords=None, defaults=(None, None, None, None, None)), ('document', '0e17773521634ef798fddd7d2ea3ef96'))
paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8'))
Expand Down
157 changes: 124 additions & 33 deletions python/paddle/fluid/data_feed_desc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,28 +24,32 @@ class DataFeedDesc(object):
currently only used for AsyncExecutor (See comments for class AsyncExecutor
for a brief introduction)

DataFeedDesc shall be initialized from a valid protobuf message from disk:
>>> data_feed = fluid.DataFeedDesc('data.proto')
DataFeedDesc shall be initialized from a valid protobuf message from disk.

See :code:`paddle/fluid/framework/data_feed.proto` for message definition.
A typical message might look like:

>>> name: "MultiSlotDataFeed"
>>> batch_size: 2
>>> multi_slot_desc {
>>> slots {
>>> name: "words"
>>> type: "uint64"
>>> is_dense: false
>>> is_used: true
>>> }
>>> slots {
>>> name: "label"
>>> type: "uint64"
>>> is_dense: false
>>> is_used: true
>>> }
>>> }
.. code-block:: python

f = open("data.proto", "w")
print >> f, 'name: "MultiSlotDataFeed"'
print >> f, 'batch_size: 2'
print >> f, 'multi_slot_desc {'
print >> f, ' slots {'
print >> f, ' name: "words"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, ' slots {'
print >> f, ' name: "label"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, '}'
f.close()
data_feed = fluid.DataFeedDesc('data.proto')

However, users usually shouldn't care about the message format; instead,
they are encouragd to use :code:`Data Generator` as a tool to generate a
Expand All @@ -54,16 +58,23 @@ class DataFeedDesc(object):

DataFeedDesc can also be changed during runtime. Once you got familiar with
what each field mean, you can modify it to better suit your need. E.g.:
>>> data_feed.set_batch_size(128)
>>> data_feed.set_dense_slots('wd') # The slot named 'wd' will be dense
>>> data_feed.set_use_slots('wd') # The slot named 'wd' will be used

.. code-block:: python

data_feed = fluid.DataFeedDesc('data.proto')
data_feed.set_batch_size(128)
data_feed.set_dense_slots('wd') # The slot named 'wd' will be dense
data_feed.set_use_slots('wd') # The slot named 'wd' will be used

Finally, the content can be dumped out for debugging purpose:
>>> print(data_feed.desc())

.. code-block:: python

print(data_feed.desc())

Args:
proto_file(string): Disk file containing a data feed description.

"""

def __init__(self, proto_file):
Expand All @@ -82,8 +93,28 @@ def set_batch_size(self, batch_size):
Set batch size. Will be effective during training

Example:
>>> data_feed = fluid.DataFeedDesc('data.proto')
>>> data_feed.set_batch_size(128)
.. code-block:: python

f = open("data.proto", "w")
print >> f, 'name: "MultiSlotDataFeed"'
print >> f, 'batch_size: 2'
print >> f, 'multi_slot_desc {'
print >> f, ' slots {'
print >> f, ' name: "words"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, ' slots {'
print >> f, ' name: "label"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, '}'
f.close()
data_feed = fluid.DataFeedDesc('data.proto')
data_feed.set_batch_size(128)

Args:
batch_size: batch size
Expand All @@ -98,8 +129,28 @@ def set_dense_slots(self, dense_slots_name):
sparse slot will be fed into a LoDTensor

Example:
>>> data_feed = fluid.DataFeedDesc('data.proto')
>>> data_feed.set_dense_slots(['words'])
.. code-block:: python

f = open("data.proto", "w")
print >> f, 'name: "MultiSlotDataFeed"'
print >> f, 'batch_size: 2'
print >> f, 'multi_slot_desc {'
print >> f, ' slots {'
print >> f, ' name: "words"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, ' slots {'
print >> f, ' name: "label"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, '}'
f.close()
data_feed = fluid.DataFeedDesc('data.proto')
data_feed.set_dense_slots(['words'])

Args:
dense_slots_name: a list of slot names which will be set dense
Expand All @@ -109,7 +160,7 @@ def set_dense_slots(self, dense_slots_name):
"""
if self.proto_desc.name != "MultiSlotDataFeed":
raise ValueError(
"Only MultiSlotDataFeed need set_dense_slots, pls check your datafeed.proto"
"Only MultiSlotDataFeed needs set_dense_slots, please check your datafeed.proto"
)
for name in dense_slots_name:
self.proto_desc.multi_slot_desc.slots[self.__name_to_index[
Expand All @@ -122,8 +173,28 @@ def set_use_slots(self, use_slots_name):
ones will be used for a specific model.

Example:
>>> data_feed = fluid.DataFeedDesc('data.proto')
>>> data_feed.set_use_slots(['words'])
.. code-block:: python

f = open("data.proto", "w")
print >> f, 'name: "MultiSlotDataFeed"'
print >> f, 'batch_size: 2'
print >> f, 'multi_slot_desc {'
print >> f, ' slots {'
print >> f, ' name: "words"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, ' slots {'
print >> f, ' name: "label"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, '}'
f.close()
data_feed = fluid.DataFeedDesc('data.proto')
data_feed.set_use_slots(['words'])

Args:
use_slots_name: a list of slot names which will be used in training
Expand All @@ -133,7 +204,7 @@ def set_use_slots(self, use_slots_name):
"""
if self.proto_desc.name != "MultiSlotDataFeed":
raise ValueError(
"Only MultiSlotDataFeed need set_use_slots, pls check your datafeed.proto"
"Only MultiSlotDataFeed needs set_use_slots, please check your datafeed.proto"
)
for name in use_slots_name:
self.proto_desc.multi_slot_desc.slots[self.__name_to_index[
Expand All @@ -144,8 +215,28 @@ def desc(self):
Returns a protobuf message for this DataFeedDesc

Example:
>>> data_feed = fluid.DataFeedDesc('data.proto')
>>> print(data_feed.desc())
.. code-block:: python

f = open("data.proto", "w")
print >> f, 'name: "MultiSlotDataFeed"'
print >> f, 'batch_size: 2'
print >> f, 'multi_slot_desc {'
print >> f, ' slots {'
print >> f, ' name: "words"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, ' slots {'
print >> f, ' name: "label"'
print >> f, ' type: "uint64"'
print >> f, ' is_dense: false'
print >> f, ' is_used: true'
print >> f, ' }'
print >> f, '}'
f.close()
data_feed = fluid.DataFeedDesc('data.proto')
print(data_feed.desc())

Returns:
A string message
Expand Down