Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions python/paddle/trainer_config_helpers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4921,12 +4921,14 @@ def crf_decoding_layer(input,

@wrap_act_default(act=SigmoidActivation())
@wrap_bias_attr_default(has_bias=True)
@wrap_param_attr_default()
@wrap_name_default()
@layer_support()
def nce_layer(input,
label,
num_classes,
num_classes=None,
act=None,
param_attr=None,
weight=None,
num_neg_samples=10,
neg_distribution=None,
Expand All @@ -4942,7 +4944,8 @@ def nce_layer(input,

.. code-block:: python

cost = nce_layer(input=layer1, label=layer2, weight=layer3,
cost = nce_layer(input=[layer1, layer2], label=layer2,
param_attr=[attr1, attr2], weight=layer3,
num_classes=3, neg_distribution=[0.1,0.3,0.6])

:param name: layer name
Expand All @@ -4957,6 +4960,8 @@ def nce_layer(input,
:type num_classes: int
:param act: Activation, default is Sigmoid.
:type act: BaseActivation
:param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute
:param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels.
Expand All @@ -4972,9 +4977,20 @@ def nce_layer(input,
"""
if isinstance(input, LayerOutput):
input = [input]
assert not isinstance(param_attr, collections.Sequence)
param_attr = [param_attr]
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add simple usage in annotation when input is not LayerOutput, i.e, add simple usage for else branch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

if isinstance(param_attr, collections.Sequence):
assert len(input) == len(param_attr)
else:
param_attr = [copy.deepcopy(param_attr) for _ in range(len(input))]

assert isinstance(input, collections.Sequence)

assert isinstance(label, LayerOutput)
assert label.layer_type == LayerType.DATA
if num_classes is None:
num_classes = label.size
if neg_distribution is not None:
assert isinstance(neg_distribution, collections.Sequence)
assert len(neg_distribution) == num_classes
Expand All @@ -4984,9 +5000,9 @@ def nce_layer(input,

ipts_for_layer = []
parents = []
for each_input in input:
for each_input, attr in zip(input, param_attr):
assert isinstance(each_input, LayerOutput)
ipts_for_layer.append(each_input.name)
ipts_for_layer.append(Input(each_input.name, **attr.attr))
parents.append(each_input)
ipts_for_layer.append(label.name)
parents.append(label)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,22 @@ layers {
}
coeff: 1.0
}
layers {
name: "__nce_layer_0__"
type: "nce"
size: 1
active_type: "sigmoid"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___nce_layer_0__.w0"
}
inputs {
input_layer_name: "labels"
}
bias_parameter_name: "___nce_layer_0__.wbias"
num_classes: 5000
num_neg_samples: 10
}
parameters {
name: "___fc_layer_0__.w0"
size: 800
Expand Down Expand Up @@ -245,6 +261,26 @@ parameters {
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.w0"
size: 20000
initial_mean: 0.0
initial_std: 0.0141421356237
dims: 5000
dims: 4
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.wbias"
size: 5000
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 5000
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input"
input_layer_names: "labels"
input_layer_names: "crf_label"
Expand All @@ -267,6 +303,7 @@ output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__"
sub_models {
name: "root"
layer_names: "input"
Expand All @@ -292,6 +329,7 @@ sub_models {
layer_names: "__huber_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__"
layer_names: "__nce_layer_0__"
input_layer_names: "input"
input_layer_names: "labels"
input_layer_names: "crf_label"
Expand All @@ -314,6 +352,7 @@ sub_models {
output_layer_names: "__huber_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__"
is_recurrent_layer_group: false
}

Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,31 @@ layers {
}
coeff: 1.0
}
layers {
name: "multi_class_label"
type: "data"
size: 500
active_type: ""
}
layers {
name: "__nce_layer_0__"
type: "nce"
size: 1
active_type: "sigmoid"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___nce_layer_0__.w0"
}
inputs {
input_layer_name: "multi_class_label"
}
inputs {
input_layer_name: "weight"
}
bias_parameter_name: "___nce_layer_0__.wbias"
num_classes: 500
num_neg_samples: 10
}
parameters {
name: "___fc_layer_0__.w0"
size: 3000
Expand All @@ -80,9 +105,30 @@ parameters {
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___nce_layer_0__.w0"
size: 5000
initial_mean: 0.0
initial_std: 0.04472135955
dims: 500
dims: 10
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___nce_layer_0__.wbias"
size: 500
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 500
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input"
input_layer_names: "label"
input_layer_names: "weight"
input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__"
evaluators {
Expand All @@ -100,9 +146,12 @@ sub_models {
layer_names: "__fc_layer_0__"
layer_names: "__cost_0__"
layer_names: "__mse_cost_0__"
layer_names: "multi_class_label"
layer_names: "__nce_layer_0__"
input_layer_names: "input"
input_layer_names: "label"
input_layer_names: "weight"
input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__"
evaluator_names: "classification_error_evaluator"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@
name='huber_label', size=1)),
multi_binary_label_cross_entropy(
input=probs, label=xe_label),
sum_cost(input=hidden))
sum_cost(input=hidden),
nce_layer(
input=hidden, label=labels))
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@
classification_cost(
input=fc, label=lbl, weight=wt),
mse_cost(
input=fc, label=lbl, weight=wt))
input=fc, label=lbl, weight=wt),
nce_layer(
input=fc,
label=data_layer(
name='multi_class_label', size=500),
weight=wt))