From ae97f1a6289b03eb249e87518976b72faede6204 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sat, 23 Sep 2017 01:51:58 +0800 Subject: [PATCH 1/3] Initial version --- python/paddle/v2/framework/tests/op_test.py | 597 +++++++++++------- .../framework/tests/test_gradient_checker.py | 36 +- .../tests/test_modified_huber_loss_op.py | 4 +- 3 files changed, 417 insertions(+), 220 deletions(-) diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 0a5673868c547d..2294f7be35daa2 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -5,175 +5,313 @@ from paddle.v2.framework.op import Operator -def grad_var_name(var_name): - return var_name + "@GRAD" - - -def create_op(scope, op_type, inputs, outputs, attrs): - kwargs = dict() - - for in_name, in_dup in Operator.get_op_inputs(op_type): - if in_name in inputs: - kwargs[in_name] = [] - if in_dup: - sub_in = inputs[in_name] - for sub_in_name, _ in sub_in: - var = scope.new_var(sub_in_name) - kwargs[in_name].append(sub_in_name) - else: - var = scope.new_var(in_name) - kwargs[in_name].append(in_name) - - for out_name, out_dup in Operator.get_op_outputs(op_type): - if out_name in outputs: - kwargs[out_name] = [] - if out_dup: - sub_out = outputs[out_name] - for sub_out_name, _ in sub_out: - var = scope.new_var(sub_out_name) - kwargs[out_name].append(sub_out_name) - else: - var = scope.new_var(out_name) - kwargs[out_name].append(out_name) - - for attr_name in Operator.get_op_attr_names(op_type): - if attr_name in attrs: - kwargs[attr_name] = attrs[attr_name] - - return Operator(op_type, **kwargs) - - -def set_input(scope, op, inputs, place): - for in_name, in_dup in Operator.get_op_inputs(op.type()): - if in_name in inputs: - if in_dup: - sub_in = inputs[in_name] - for sub_in_name, sub_in_val in sub_in: - var = scope.find_var(sub_in_name) - tensor = var.get_tensor() - sub_in_array = sub_in_val[0] \ - if isinstance(sub_in_val, tuple) else sub_in_val - tensor.set_dims(sub_in_array.shape) - tensor.set(sub_in_array, place) - if isinstance(sub_in_val, tuple): - tensor.set_lod(sub_in_val[1]) - else: - var = scope.find_var(in_name) - tensor = var.get_tensor() - in_val = inputs[in_name] - in_array = in_val[0] if isinstance(in_val, tuple) else in_val - tensor.set_dims(in_array.shape) - tensor.set(in_array, place) - if isinstance(in_val, tuple): - tensor.set_lod(in_val[1]) - - -def set_output_grad(scope, op, outputs, place): - for out_name, out_dup in Operator.get_op_outputs(op.type()): - if out_name in outputs: - if out_dup: - sub_out = outputs[out_name] - for sub_out_name, _ in sub_out: - out_tensor = scope.find_var(sub_out_name).get_tensor() - grad_tensor = scope.new_var(grad_var_name( - sub_out_name)).get_tensor() - grad_tensor.set_dims(out_tensor.shape()) - data = np.ones(out_tensor.shape(), dtype=np.float32) - grad_tensor.set(data, place) - else: - out_tensor = scope.find_var(out_name).get_tensor() - grad_tensor = scope.new_var(grad_var_name(out_name)).get_tensor( - ) - grad_tensor.set_dims(out_tensor.shape()) - data = np.ones(out_tensor.shape(), dtype=np.float32) - grad_tensor.set(data, place) - - -def get_numeric_gradient(scope, - op, - inputs, - input_to_check, - output_names, - delta=0.005, - in_place=False): - - set_input(scope, op, inputs, core.CPUPlace()) - op.infer_shape(scope) - - tensor_to_check = scope.find_var(input_to_check).get_tensor() - - def product(dim): +# TODO(pkuyym) simplify the code and add more comments +class TestUtils(object): + @classmethod + def grad_var_name(cls, var_name): + return var_name + "@GRAD" + + @classmethod + def get_tensor_by_name(cls, scope, var_name): + var = scope.find_var(var_name) + return var.get_tensor() + + @classmethod + def create_variable(cls, scope, var_name, value=None, place=None): + var = scope.new_var(var_name) + if value is not None: + assert place is not None, \ + 'Place must be specified if value provided' + tensor = var.get_tensor() + tensor.set_dims(value.shape) + tensor.set(value, place) + + @classmethod + def create_op(cls, scope, op_type, inputs, outputs, attrs): + kwargs = dict() + # parepare parameters for creating operator + for in_var_name, is_dup in Operator.get_op_inputs(op_type): + if in_var_name in inputs: + kwargs[in_var_name] = [] + if is_dup: + sub_vars = inputs[in_var_name] + for sub_var_name, _ in sub_vars: + cls.create_variable(scope, sub_var_name) + kwargs[in_var_name].append(sub_var_name) + else: + cls.create_variable(scope, in_var_name) + kwargs[in_var_name].append(in_var_name) + + for out_var_name, is_dup in Operator.get_op_outputs(op_type): + if out_var_name in outputs: + kwargs[out_var_name] = [] + if is_dup: + sub_vars = outputs[out_var_name] + for sub_var_name, _ in sub_vars: + cls.create_variable(scope, sub_var_name) + kwargs[out_var_name].append(sub_var_name) + else: + cls.create_variable(scope, out_var_name) + kwargs[out_var_name].append(out_var_name) + + for attr_name in Operator.get_op_attr_names(op_type): + if attr_name in attrs: + kwargs[attr_name] = attrs[attr_name] + + return Operator(op_type, **kwargs) + + @classmethod + def get_backward_op(cls, scope, op, no_grad_set): + backward_op = core.Operator.backward(op, no_grad_set) + for in_var_name in backward_op.input_vars(): + cls.create_variable(scope, in_var_name) + for out_var_name in backward_op.output_vars(): + cls.create_variable(scope, out_var_name) + return backward_op + + @classmethod + def _feed_var(cls, scope, var_name, value, place): + tensor = cls.get_tensor_by_name(scope, var_name) + lod_info = None + if isinstance(value, tuple): + data = value[0] + lod_info = value[1] + else: + data = value + tensor.set_dims(data.shape) + tensor.set(data, place) + if lod_info is not None: + tensor.set_lod(lod_info) + + @classmethod + def feed_input(cls, scope, op, inputs, place): + for in_var_name, is_dup in Operator.get_op_inputs(op.type()): + if in_var_name in inputs: + if is_dup: + sub_vars = inputs[in_var_name] + for sub_var_name, sub_var_val in sub_vars: + cls._feed_var(scope, sub_var_name, sub_var_val, place) + else: + in_var_val = inputs[in_var_name] + cls._feed_var(scope, in_var_name, in_var_val, place) + + @classmethod + def dim_to_size(cls, dim): return reduce(lambda a, b: a * b, dim, 1) - ctx = core.DeviceContext.create(core.CPUPlace()) - - def get_output(): - sum = 0.0 - for output_name in output_names: - op.run(scope, ctx) - sum += np.array(scope.find_var(output_name).get_tensor()).sum() - return sum - - tensor_to_check = scope.find_var(input_to_check).get_tensor() - tensor_size = product(tensor_to_check.get_dims()) - gradient_flat = np.zeros(shape=(tensor_size, ), dtype='float32') - # we only compute gradient of one element each time. - # we use a for loop to compute the gradient of every element. - for i in xrange(tensor_size): - if in_place: - set_input(scope, op, inputs, core.CPUPlace()) - - # get one input element throw it's index i. - origin = tensor_to_check.get_float_element(i) - # add delta to it, run op and then get the sum of the result tensor. - x_pos = origin + delta - tensor_to_check.set_float_element(i, x_pos) - y_pos = get_output() - - if in_place: - set_input(scope, op, inputs, core.CPUPlace()) - - x_neg = origin - delta - tensor_to_check.set_float_element(i, x_neg) - y_neg = get_output() - - tensor_to_check.set_float_element(i, origin) - gradient_flat[i] = (y_pos - y_neg) / delta / 2 - - return gradient_flat.reshape(tensor_to_check.get_dims()) - - -def get_backward_op(scope, op, no_grad_set): - backward_op = core.Operator.backward(op, no_grad_set) - for input in backward_op.input_vars(): - var = scope.new_var(input) - var.get_tensor() - for output in backward_op.output_vars(): - var = scope.new_var(output) - var.get_tensor() - return backward_op - - -def get_gradient(scope, op, inputs, outputs, grad_name, place, - no_grad_set=None): - ctx = core.DeviceContext.create(place) - - set_input(scope, op, inputs, place) - - op.infer_shape(scope) - op.run(scope, ctx) - - if no_grad_set is None: - no_grad_set = set() - - backward_op = get_backward_op(scope, op, no_grad_set) - set_output_grad(scope, op, outputs, place) + @classmethod + def get_numeric_gradient(cls, + scope, + op, + inputs, + input_to_check, + output_names, + delta=0.005, + in_place=False, + strict=False): + # compute numeric gradients on CPU + cpu_place = core.CPUPlace() + cls.feed_input(scope, op, inputs, cpu_place) + op.infer_shape(scope) + ctx = core.DeviceContext.create(cpu_place) + + tensor_to_check = cls.get_tensor_by_name(scope, input_to_check) + tensor_size = cls.dim_to_size(tensor_to_check.get_dims()) + x_pos_jacobian = np.zeros((tensor_size, 0), dtype=np.float64) + x_neg_jacobian = np.zeros((tensor_size, 0), dtype=np.float64) + + def concat_flatten_output(row, jacobian_matrix): + if jacobian_matrix.shape[1] == 0: + # first time, concate output dynamically + output_vals = [] + for output_name in output_names: + op.run(scope, ctx) + output_val = np.array( + cls.get_tensor_by_name(scope, output_name)).flatten() + output_vals = np.append(output_vals, output_val) + # get dimension info, allocate memory + jacobian_matrix.resize( + (tensor_size, len(output_vals)), refcheck=False) + jacobian_matrix[row, :] = output_vals.flatten() + else: + start_idx = 0 + for output_name in output_names: + op.run(scope, ctx) + output_val = np.array( + cls.get_tensor_by_name(scope, output_name)).flatten() + jacobian_matrix[row, start_idx:start_idx+len(output_val)] \ + = output_val + start_idx += len(output_val) + + for i in xrange(tensor_size): + if in_place: + cls.feed_input(scope, op, inputs, cpu_place) + origin_val = tensor_to_check.get_float_element(i) + x_pos = origin_val + delta + tensor_to_check.set_float_element(i, x_pos) + concat_flatten_output(i, x_pos_jacobian) + if in_place: + cls.feed_input(scope, op, inputs, cpu_place) + x_neg = origin_val - delta + tensor_to_check.set_float_element(i, x_neg) + concat_flatten_output(i, x_neg_jacobian) + tensor_to_check.set_float_element(i, origin_val) + + grad_jacobian = (x_pos_jacobian - x_neg_jacobian) / delta / 2 + # return numeric gradient jacobian matrix + if strict == False: + return grad_jacobian.sum(axis=1).reshape(tensor_to_check.shape()) + return grad_jacobian + + # TODO(pkuyym) should pass output_names not outputs + @classmethod + def get_simple_analytic_grads(cls, + scope, + op, + inputs, + outputs, + grad_name, + place, + no_grad_set=None): + ctx = core.DeviceContext.create(place) + cls.feed_input(scope, op, inputs, place) + # run forward + op.infer_shape(scope) + op.run(scope, ctx) - backward_op.infer_shape(scope) - backward_op.run(scope, ctx) + if no_grad_set is None: + no_grad_set = set() - out = np.array(scope.find_var(grad_name).get_tensor()) - return out + backward_op = cls.get_backward_op(scope, op, no_grad_set) + # feed Input(Out@Grad), just set to one for all values + for out_var_name, is_dup in Operator.get_op_outputs(op.type()): + if out_var_name in outputs: + if is_dup: + sub_vars = outputs[out_var_name] + for sub_var_name, _ in sub_vars: + out_var_tensor = cls.get_tensor_by_name(scope, + sub_var_name) + data = np.ones(out_var_tensor.shape(), dtype=np.float64) + cls.create_variable( + scope, + cls.grad_var_name(sub_var_name), + value=data, + place=place) + else: + out_var_tensor = cls.get_tensor_by_name(scope, out_var_name) + data = np.ones(out_var_tensor.shape(), np.float64) + cls.create_variable( + scope, + cls.grad_var_name(out_var_name), + value=data, + place=place) + + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + out = np.array(cls.get_tensor_by_name(scope, grad_name)) + return out + + @classmethod + def get_out_var_shapes(cls, scope, op, outputs): + out_var_shapes = [] + for out_var_name, is_dup in Operator.get_op_outputs(op.type()): + if out_var_name in outputs: + if is_dup: + sub_vars = outputs[out_var_name] + for sub_var_name, _ in sub_vars: + out_var_tensor = cls.get_tensor_by_name(scope, + sub_var_name) + out_var_shapes.append( + (sub_var_name, out_var_tensor.shape())) + else: + out_var_tensor = cls.get_tensor_by_name(scope, out_var_name) + out_var_shapes.append( + (out_var_name, out_var_tensor.shape())) + return out_var_shapes + + # TODO(pkuyym) should pass output_names not outputs + @classmethod + def get_jacobian_analytic_grads(cls, + scope, + op, + inputs, + outputs, + grad_name, + place, + no_grad_set=None): + # only run forward one time + ctx = core.DeviceContext.create(place) + cls.feed_input(scope, op, inputs, place) + op.infer_shape(scope) + op.run(scope, ctx) + + # get shape for each outputs, may pass by outside + out_var_shapes = cls.get_out_var_shapes(scope, op, outputs) + accum_size = np.zeros((len(out_var_shapes)), dtype=np.int32) + var_shape_idx = {} + for i in xrange(len(out_var_shapes)): + accum_size[i] = cls.dim_to_size(out_var_shapes[i][1]) + \ + (accum_size[i - 1] if i > 0 else 0) + var_shape_idx[out_var_shapes[i][0]] = i + + out_grad_values = np.zeros(accum_size[-1], dtype=np.float64) + x_grad_jacobian = None + + backward_op = cls.get_backward_op(scope, op, no_grad_set) + + def fill_tensor(tensor_name, tensor, place): + tensor_shape = tensor.shape() + if tensor_name.encode('utf-8') in var_shape_idx: + idx = var_shape_idx[tensor_name] + start = accum_size[idx - 1] if idx > 0 else 0 + data = out_grad_values[start:accum_size[idx]].reshape( + tensor_shape) + else: + data = np.zeros(tensor_shape, dtype=np.float64) + tensor.set(data, place) + + for i in xrange(accum_size[-1]): + # each time set 1 to one value + out_grad_values[i] = 1 + # feed Input(Out@Grad) + for out_var_name, is_dup in Operator.get_op_outputs(op.type()): + if out_var_name in outputs: + if is_dup: + sub_vars = outputs[out_var_name] + for sub_var_name, _ in sub_vars: + out_var_tensor = cls.get_tensor_by_name( + scope, sub_var_name) + cls.create_variable(scope, + cls.grad_var_name(sub_var_name)) + out_grad_tensor = cls.get_tensor_by_name( + scope, cls.grad_var_name(sub_var_name)) + out_grad_tensor.set_dims(out_var_tensor.shape()) + fill_tensor(var_name, out_grad_tensor, place) + else: + out_var_tensor = cls.get_tensor_by_name(scope, + out_var_name) + cls.create_variable(scope, + cls.grad_var_name(out_var_name)) + out_grad_tensor = cls.get_tensor_by_name( + scope, cls.grad_var_name(out_var_name)) + out_grad_tensor.set_dims(out_var_tensor.shape()) + fill_tensor(out_var_name, out_grad_tensor, place) + + if no_grad_set is None: + no_grad_set = set() + + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + # fill input gradient jacobian matrix + x_grad_col = np.array(cls.get_tensor_by_name(scope, grad_name)) + if x_grad_jacobian is None: + # get shape info, allocat memory + x_grad_jacobian = np.zeros((x_grad_col.size, accum_size[-1])) + x_grad_jacobian[:, i] = x_grad_col.flatten() + # reset to zero + out_grad_values[i] = 0 + + return x_grad_jacobian class OpTest(unittest.TestCase): @@ -182,39 +320,43 @@ def check_output_with_place(self, place): op_inputs = self.inputs if hasattr(self, "inputs") else dict() op_outputs = self.outputs if hasattr(self, "outputs") else dict() op_attrs = self.attrs if hasattr(self, "attrs") else dict() - self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs, - op_attrs) + self.op = TestUtils.create_op(self.scope, self.op_type, op_inputs, + op_outputs, op_attrs) + if isinstance(place, core.GPUPlace) and not self.op.support_gpu(): return - set_input(self.scope, self.op, self.inputs, place) + + TestUtils.feed_input(self.scope, self.op, self.inputs, place) self.op.infer_shape(self.scope) ctx = core.DeviceContext.create(place) self.op.run(self.scope, ctx) - for out_name, out_dup in Operator.get_op_outputs(self.op.type()): - if out_name not in self.outputs: + for out_var_name, is_dup in Operator.get_op_outputs(self.op.type()): + if out_var_name not in self.outputs: continue - if out_dup: - sub_out = self.outputs[out_name] - if not isinstance(sub_out, list): + if is_dup: + sub_vars = self.outputs[out_var_name] + if not isinstance(sub_vars, list): raise AssertionError("sub_out type %s is not list", type(sub_out)) - for sub_out_name, expect in sub_out: - actual = np.array( - self.scope.find_var(sub_out_name).get_tensor()) + for sub_var_name, expect_val in sub_vars: + actual_val = np.array( + TestUtils.get_tensor_by_name(self.scope, sub_var_name)) + self.assertTrue( np.allclose( - actual, expect, atol=1e-05), - "output name: " + out_name + " has diff") + actual_val, expect_val, atol=1e-05), + "output name: " + out_var_name + " has diff") else: - actual = np.array(self.scope.find_var(out_name).get_tensor()) - expect = self.outputs[out_name] + actual_val = np.array( + TestUtils.get_tensor_by_name(self.scope, out_var_name)) + expect_val = self.outputs[out_var_name] self.assertTrue( np.allclose( - actual, expect, atol=1e-05), - "output name: " + out_name + " has diff") + actual_val, expect_val, atol=1e-05), + "output name: " + out_var_name + " has diff") def check_output(self): places = [core.CPUPlace()] @@ -223,20 +365,18 @@ def check_output(self): for place in places: self.check_output_with_place(place) - def __assert_is_close(self, numeric_grads, analytic_grads, names, - max_relative_error, msg_prefix): - + def _assert_is_close(self, numeric_grads, analytic_grads, names, + max_relative_error, msg_prefix): for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): abs_a = np.abs(a) abs_a[abs_a < 1e-3] = 1 - diff_mat = np.abs(a - b) / abs_a max_diff = np.max(diff_mat) def err_msg(): offset = np.argmax(diff_mat > max_relative_error) - return "%s Variable %s max gradient diff %f over limit %f, the first " \ - "error element is %d" % ( + return "%s Variable %s max gradient diff %f over limit %f, "\ + "the first error element is %d" % ( msg_prefix, name, max_diff, max_relative_error, offset) self.assertLessEqual(max_diff, max_relative_error, err_msg()) @@ -246,54 +386,89 @@ def check_grad(self, output_names, no_grad_set=None, in_place=False, - max_relative_error=0.005): + max_relative_error=0.005, + strict=False): self.scope = core.Scope() op_inputs = self.inputs if hasattr(self, "inputs") else dict() op_outputs = self.outputs if hasattr(self, "outputs") else dict() op_attrs = self.attrs if hasattr(self, "attrs") else dict() - self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs, - op_attrs) + self.op = TestUtils.create_op(self.scope, self.op_type, op_inputs, + op_outputs, op_attrs) + if no_grad_set is None: no_grad_set = set() if not type(output_names) is list: output_names = [output_names] + involved_outputs = {} + for key, val in self.outputs.items(): + if key in output_names: + involved_outputs[key] = val + elif isinstance(val, list): + sub_outs = [] + for sub_var_name, sub_var_val in val: + if sub_var_name in output_names: + sub_outs.append((sub_var_name, sub_var_val)) + involved_outputs[key] = sub_outs + numeric_grads = [ - get_numeric_gradient( + TestUtils.get_numeric_gradient( self.scope, self.op, self.inputs, input_to_check, output_names, - in_place=in_place) for input_to_check in inputs_to_check + in_place=in_place, + strict=strict) for input_to_check in inputs_to_check ] + grad_names = [ - grad_var_name(input_to_check) for input_to_check in inputs_to_check + TestUtils.grad_var_name(input_to_check) \ + for input_to_check in inputs_to_check ] cpu_place = core.CPUPlace() - cpu_analytic_grads = [ - get_gradient(self.scope, self.op, self.inputs, self.outputs, - grad_name, cpu_place, no_grad_set) - for grad_name in grad_names - ] - self.__assert_is_close(numeric_grads, cpu_analytic_grads, grad_names, - max_relative_error, - "Gradient Check On %s" % str(cpu_place)) - - if core.is_compile_gpu() and self.op.support_gpu(): - gpu_place = core.GPUPlace(0) - gpu_analytic_grads = [ - get_gradient(self.scope, self.op, self.inputs, self.outputs, - grad_name, gpu_place, no_grad_set) + if strict == False: + cpu_analytic_grads = [ + TestUtils.get_simple_analytic_grads( + self.scope, self.op, self.inputs, + involved_outputs, grad_name, cpu_place, no_grad_set) for grad_name in grad_names ] + else: + cpu_analytic_grads = [ + TestUtils.get_jacobian_analytic_grads( + self.scope, self.op, self.inputs, + involved_outputs, grad_name, cpu_place, no_grad_set) + for grad_name in grad_names + ] + + self._assert_is_close(numeric_grads, cpu_analytic_grads, grad_names, + max_relative_error, + "Gradient Check On %s" % str(cpu_place)) - self.__assert_is_close(numeric_grads, gpu_analytic_grads, - grad_names, max_relative_error, - "Gradient Check On %s" % str(gpu_place)) + if core.is_compile_gpu() and self.op.support_gpu(): + gpu_place = core.GPUPlace(0) + if strict == False: + gpu_analytic_grads = [ + TestUtils.get_simple_analytic_grads( + self.scope, self.op, self.inputs, involved_outputs, + grad_name, gpu_place, no_grad_set) + for grad_name in grad_names + ] + else: + gpu_analytic_grads = [ + TestUtils.get_jacobian_analytic_grads( + self.scope, self.op, self.inputs, involved_outputs, + grad_name, gpu_place, no_grad_set) + for grad_name in grad_names + ] + + self._assert_is_close(numeric_grads, gpu_analytic_grads, grad_names, + max_relative_error, + "Gradient Check On %s" % str(gpu_place)) for c_grad, g_grad, name in itertools.izip( cpu_analytic_grads, gpu_analytic_grads, grad_names): diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py index 85117bf9600975..935f4c7b1df281 100644 --- a/python/paddle/v2/framework/tests/test_gradient_checker.py +++ b/python/paddle/v2/framework/tests/test_gradient_checker.py @@ -1,8 +1,7 @@ import unittest import numpy as np import paddle.v2.framework.core as core -from op_test import get_numeric_gradient -from op_test import create_op +from op_test import TestUtils class GetNumericGradientTest(unittest.TestCase): @@ -11,11 +10,32 @@ def test_add_op(self): y = np.random.random((10, 1)).astype("float32") z = x + y scope = core.Scope() - add_op = create_op(scope, "add", {'X': x, 'Y': y}, {'Out': z}, dict()) - arr = get_numeric_gradient(scope, add_op, {'X': x, - 'Y': y}, 'X', ['Out']) + add_op = TestUtils.create_op(scope, "add", {'X': x, + 'Y': y}, {'Out': z}, dict()) + arr = TestUtils.get_numeric_gradient(scope, add_op, {'X': x, + 'Y': y}, 'X', + ['Out']) self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4) + def test_add_op_strict(self): + x = np.random.random((10, 1)).astype("float32") + y = np.random.random((10, 1)).astype("float32") + z = x + y + scope = core.Scope() + add_op = TestUtils.create_op(scope, "add", {'X': x, + 'Y': y}, {'Out': z}, dict()) + arr = TestUtils.get_numeric_gradient( + scope, + add_op, {'X': x, + 'Y': y}, + 'X', ['Out'], + delta=0.0001, + in_place=False, + strict=True) + self.assertAlmostEqual( + arr.all(), np.identity( + 10, dtype=np.float64).all(), delta=1e-4) + def test_softmax_op(self): def stable_softmax(x): """Compute the softmax of vector x in a numerically stable way.""" @@ -36,9 +56,11 @@ def label_softmax_grad(Y, dY): dX = label_softmax_grad(Y, dY) scope = core.Scope() - softmax_op = create_op(scope, "softmax", {"X": X}, {"Y": Y}, dict()) + softmax_op = TestUtils.create_op(scope, "softmax", {"X": X}, {"Y": Y}, + dict()) - arr = get_numeric_gradient(scope, softmax_op, {"X": X}, "X", "Y") + arr = TestUtils.get_numeric_gradient(scope, softmax_op, {"X": X}, "X", + "Y") np.testing.assert_almost_equal(arr, dX, decimal=1e-2) diff --git a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py index a7e2b57529b072..128ac146758180 100644 --- a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py +++ b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py @@ -31,8 +31,8 @@ def setUp(self): def test_check_output(self): self.check_output() - def test_check_grad(self): - self.check_grad(['X'], 'Out', max_relative_error=0.005) + def test_check_grad_strict(self): + self.check_grad(['X'], 'Out', max_relative_error=0.0001, strict=True) if __name__ == '__main__': From 9cdd04a197b560b9c847db4c9c9c43fb6c18f2b7 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 26 Sep 2017 21:33:48 +0800 Subject: [PATCH 2/3] Add more test cases and do some minor optimization. --- python/paddle/v2/framework/tests/op_test.py | 5 +-- .../framework/tests/test_gradient_checker.py | 43 +++++++++++++++---- .../tests/test_modified_huber_loss_op.py | 4 +- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 2294f7be35daa2..5d64f28dd19d39 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -121,11 +121,11 @@ def get_numeric_gradient(cls, x_neg_jacobian = np.zeros((tensor_size, 0), dtype=np.float64) def concat_flatten_output(row, jacobian_matrix): + op.run(scope, ctx) if jacobian_matrix.shape[1] == 0: # first time, concate output dynamically output_vals = [] for output_name in output_names: - op.run(scope, ctx) output_val = np.array( cls.get_tensor_by_name(scope, output_name)).flatten() output_vals = np.append(output_vals, output_val) @@ -136,7 +136,6 @@ def concat_flatten_output(row, jacobian_matrix): else: start_idx = 0 for output_name in output_names: - op.run(scope, ctx) output_val = np.array( cls.get_tensor_by_name(scope, output_name)).flatten() jacobian_matrix[row, start_idx:start_idx+len(output_val)] \ @@ -261,7 +260,7 @@ def get_jacobian_analytic_grads(cls, def fill_tensor(tensor_name, tensor, place): tensor_shape = tensor.shape() - if tensor_name.encode('utf-8') in var_shape_idx: + if tensor_name in var_shape_idx: idx = var_shape_idx[tensor_name] start = accum_size[idx - 1] if idx > 0 else 0 data = out_grad_values[start:accum_size[idx]].reshape( diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py index 935f4c7b1df281..e610d4d3cf3c94 100644 --- a/python/paddle/v2/framework/tests/test_gradient_checker.py +++ b/python/paddle/v2/framework/tests/test_gradient_checker.py @@ -25,16 +25,11 @@ def test_add_op_strict(self): add_op = TestUtils.create_op(scope, "add", {'X': x, 'Y': y}, {'Out': z}, dict()) arr = TestUtils.get_numeric_gradient( - scope, - add_op, {'X': x, - 'Y': y}, - 'X', ['Out'], - delta=0.0001, - in_place=False, - strict=True) + scope, add_op, {'X': x, + 'Y': y}, 'X', ['Out'], strict=True) self.assertAlmostEqual( arr.all(), np.identity( - 10, dtype=np.float64).all(), delta=1e-4) + 10, dtype=np.float64).all(), delta=1e-6) def test_softmax_op(self): def stable_softmax(x): @@ -63,6 +58,38 @@ def label_softmax_grad(Y, dY): "Y") np.testing.assert_almost_equal(arr, dX, decimal=1e-2) + def test_softmax_op_strict(self): + def stable_softmax(x): + """Compute the softmax of vector x in a numerically stable way.""" + shiftx = x - np.max(x) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(Y.shape[0]): + d = np.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + X = np.random.random((2, 2)).astype("float32") + Y = np.apply_along_axis(stable_softmax, 1, X) + dX = np.zeros((X.size, 0)).astype("float32") + dY = np.zeros(Y.shape) + for i in xrange(X.size): + dY.ravel()[i] = 1 + dX = np.concatenate( + (dX, label_softmax_grad(Y, dY).reshape(X.size, 1)), axis=1) + dY.ravel()[i] = 0 + + scope = core.Scope() + softmax_op = TestUtils.create_op(scope, "softmax", {"X": X}, {"Y": Y}, + dict()) + + arr = TestUtils.get_numeric_gradient( + scope, softmax_op, {"X": X}, "X", "Y", strict=True) + np.testing.assert_almost_equal(arr, dX, decimal=1e-6) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py index 128ac146758180..eab3ed13bdf3d7 100644 --- a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py +++ b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py @@ -15,9 +15,9 @@ def modified_huber_loss_forward(val): class TestModifiedHuberLossOp(OpTest): def setUp(self): self.op_type = 'modified_huber_loss' - samples_num = 32 + samples_num = 16 self.inputs = { - 'X': np.random.uniform(-1, 1., (samples_num, 1)).astype('float32'), + 'X': np.random.uniform(-1, 1., (samples_num, 1)).astype('float64'), 'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1)) } product_res = self.inputs['X'] * (2 * self.inputs['Y'] - 1) From e84217c1cd1d1353d1448781a5a4904c4af3de0e Mon Sep 17 00:00:00 2001 From: yangyaming Date: Wed, 27 Sep 2017 11:40:20 +0800 Subject: [PATCH 3/3] Follow comments. --- python/paddle/v2/framework/tests/op_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 5d64f28dd19d39..90900805303c38 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -256,6 +256,9 @@ def get_jacobian_analytic_grads(cls, out_grad_values = np.zeros(accum_size[-1], dtype=np.float64) x_grad_jacobian = None + if no_grad_set is None: + no_grad_set = set() + backward_op = cls.get_backward_op(scope, op, no_grad_set) def fill_tensor(tensor_name, tensor, place): @@ -296,9 +299,6 @@ def fill_tensor(tensor_name, tensor, place): out_grad_tensor.set_dims(out_var_tensor.shape()) fill_tensor(out_var_name, out_grad_tensor, place) - if no_grad_set is None: - no_grad_set = set() - backward_op.infer_shape(scope) backward_op.run(scope, ctx) # fill input gradient jacobian matrix