Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions paddle/operators/crf_decoding_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,18 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
"w. See more details in comments of the linear_chain_crf operator.");
AddInput(
"Label",
"(LoDTensor, LoDTensor<int>). The ground truth with shape "
"(LoDTensor, LoDTensor<int64_t>). The ground truth with shape "
"[N x 1]. This input is optional. See more details in the operator's "
"comments.")
.AsDispensable();
AddOutput("ViterbiPath",
"(LoDTensor, LoDTensor<int>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the groud "
"truth) is given. See more details in the operator's comment.");
AddOutput(
"ViterbiPath",
"(LoDTensor, LoDTensor<int64_t>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the ground "
"truth) is given. See more details in the operator's comment.");
AddComment(R"DOC(
The crf_decoding operator reads the emission feature weights and the transition
freature weights learned by the linear_chain_crf operator. It implements the
feature weights learned by the linear_chain_crf operator. It implements the
Viterbi algorithm which is a dynamic programming algorithm for finding the most
likely sequence of hidden states, called the Viterbi path, that results in a
sequence of observed tags.
Expand All @@ -60,14 +61,14 @@ operator.

When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the
prediction, or 1 indicating a tag is correctly predicted. Such an output is the
input to chunk_eval operator.

2. Input(Label) is not given:

This is the standard decoding process.

The crf_decoding operator returns a row vecotr with shape [N x 1] whose values
The crf_decoding operator returns a row vector with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a
predicted tag.
)DOC");
Expand Down
10 changes: 5 additions & 5 deletions paddle/operators/crf_decoding_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
const size_t level = 0;
const size_t seq_num = lod[level].size() - 1;

int* path = decoded_path->mutable_data<int>(platform::CPUPlace());
math::SetConstant<platform::CPUPlace, int>()(ctx.device_context(),
decoded_path, 0);
int64_t* path = decoded_path->mutable_data<int64_t>(platform::CPUPlace());
math::SetConstant<platform::CPUPlace, int64_t>()(ctx.device_context(),
decoded_path, 0);
for (size_t i = 0; i < seq_num; ++i) {
int start_pos = static_cast<int>(lod[level][i]);
int end_pos = static_cast<int>(lod[level][i + 1]);
Expand All @@ -57,7 +57,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
if (label) {
PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
"The Input(Label) should be a sequence.");
const int* label_value = label->data<int>();
const int64_t* label_value = label->data<int64_t>();
size_t batch_size = emission_weights->dims()[0];
for (size_t i = 0; i < batch_size; ++i) {
path[i] = label_value[i] == path[i] ? 1 : 0;
Expand All @@ -76,7 +76,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {

const T* x = emission_weights.data<T>();
const T* w = transition_weights.data<T>();
int* path = decoded_path->data<int>();
int64_t* path = decoded_path->data<int64_t>();

// alpha is a memo table. An element alpha(k, v) records the score of the
// best sequence of tags from position 1 to position k with v being the end
Expand Down
2 changes: 1 addition & 1 deletion python/paddle/v2/fluid/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def __init__(self,

def find_name(var_list, name):
for var_name in var_list:
if var_name == name:
if var_list[var_name] is not None and var_name == name:
return True
return False

Expand Down
8 changes: 7 additions & 1 deletion python/paddle/v2/fluid/layer_helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import itertools

from framework import Variable, default_main_program, default_startup_program, \
from framework import Variable, Parameter, default_main_program, default_startup_program, \
unique_name, dtype_is_floating
from paddle.v2.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr
Expand Down Expand Up @@ -122,6 +122,12 @@ def create_parameter(self,
return self.main_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr.to_kwargs())

def get_parameter(self, name):
param = self.main_program.global_block().var(name)
if not isinstance(param, Parameter):
raise ValueError("no Parameter name %s found" % name)
return param

def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var(
name=unique_name(".".join([self.name, 'tmp'])),
Expand Down
18 changes: 18 additions & 0 deletions python/paddle/v2/fluid/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,24 @@ def linear_chain_crf(input,
return log_likelihood


def crf_decoding(input,
param_attr,
label=None,
main_program=None,
startup_program=None):
helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='crf_decoding',
inputs={"Emission": [input],
"Transition": transition,
"Label": label},
outputs={"ViterbiPath": [viterbi_path]})

return viterbi_path


def assign(input, output, main_program=None, startup_program=None):
helper = LayerHelper('assign', **locals())
helper.append_op(
Expand Down
12 changes: 9 additions & 3 deletions python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,19 @@ def main():
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(x=crf_cost)

# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
# check other optimizers and check why out will be NAN
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
sgd_optimizer.minimize(avg_cost)

# TODO(qiao)
# add dependency track and move this config before optimizer
crf_decode = fluid.layers.crf_decoding(
input=feature_out,
label=target,
param_attr=fluid.ParamAttr(name='crfw'))

train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192),
Expand All @@ -168,7 +175,6 @@ def main():
feed=feeder.feed(data),
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])

if batch_id % 10 == 0:
print("avg_cost=" + str(avg_cost_val))

Expand Down
7 changes: 6 additions & 1 deletion python/paddle/v2/fluid/tests/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.param_attr import ParamAttr


class TestBook(unittest.TestCase):
Expand Down Expand Up @@ -132,8 +133,12 @@ def test_linear_chain_crf(self):
images = layers.data(name='pixel', shape=[784], dtype='float32')
label = layers.data(name='label', shape=[1], dtype='int32')
hidden = layers.fc(input=images, size=128)
crf = layers.linear_chain_crf(input=hidden, label=label)
crf = layers.linear_chain_crf(
input=hidden, label=label, param_attr=ParamAttr(name="crfw"))
crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw"))
self.assertNotEqual(crf, None)
self.assertNotEqual(crf_decode, None)

print(str(program))

Expand Down