diff --git a/easy_rec/python/builders/loss_builder.py b/easy_rec/python/builders/loss_builder.py index 7459372a5..ec4ab57c8 100644 --- a/easy_rec/python/builders/loss_builder.py +++ b/easy_rec/python/builders/loss_builder.py @@ -41,12 +41,18 @@ def build(loss_type, return tf.losses.mean_squared_error( labels=label, predictions=pred, weights=loss_weight, **kwargs) elif loss_type == LossType.JRC_LOSS: - alpha = 0.5 if loss_param is None else loss_param.alpha - auto_weight = False if loss_param is None else not loss_param.HasField( - 'alpha') session = kwargs.get('session_ids', None) + if loss_param is None: + return jrc_loss(label, pred, session, name=loss_name) return jrc_loss( - label, pred, session, alpha, auto_weight=auto_weight, name=loss_name) + label, + pred, + session, + loss_param.alpha, + loss_weight_strategy=loss_param.loss_weight_strategy, + sample_weights=loss_weight, + same_label_loss=loss_param.same_label_loss, + name=loss_name) elif loss_type == LossType.PAIR_WISE_LOSS: session = kwargs.get('session_ids', None) margin = 0 if loss_param is None else loss_param.margin diff --git a/easy_rec/python/compat/array_ops.py b/easy_rec/python/compat/array_ops.py new file mode 100644 index 000000000..d788bc8c1 --- /dev/null +++ b/easy_rec/python/compat/array_ops.py @@ -0,0 +1,229 @@ +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import gen_math_ops + + +def convert_to_int_tensor(tensor, name, dtype=tf.int32): + """Converts the given value to an integer Tensor.""" + tensor = ops.convert_to_tensor(tensor, name=name, preferred_dtype=dtype) + if tensor.dtype.is_integer: + tensor = gen_math_ops.cast(tensor, dtype) + else: + raise TypeError('%s must be an integer tensor; dtype=%s' % + (name, tensor.dtype)) + return tensor + + +def _with_nonzero_rank(data): + """If `data` is scalar, then add a dimension; otherwise return as-is.""" + if data.shape.ndims is not None: + if data.shape.ndims == 0: + return tf.stack([data]) + else: + return data + else: + data_shape = tf.shape(data) + data_ndims = tf.rank(data) + return tf.reshape(data, tf.concat([[1], data_shape], axis=0)[-data_ndims:]) + + +def get_positive_axis(axis, ndims): + """Validate an `axis` parameter, and normalize it to be positive. + + If `ndims` is known (i.e., not `None`), then check that `axis` is in the + range `-ndims <= axis < ndims`, and return `axis` (if `axis >= 0`) or + `axis + ndims` (otherwise). + If `ndims` is not known, and `axis` is positive, then return it as-is. + If `ndims` is not known, and `axis` is negative, then report an error. + + Args: + axis: An integer constant + ndims: An integer constant, or `None` + + Returns: + The normalized `axis` value. + + Raises: + ValueError: If `axis` is out-of-bounds, or if `axis` is negative and + `ndims is None`. + """ + if not isinstance(axis, int): + raise TypeError('axis must be an int; got %s' % type(axis).__name__) + if ndims is not None: + if 0 <= axis < ndims: + return axis + elif -ndims <= axis < 0: + return axis + ndims + else: + raise ValueError('axis=%s out of bounds: expected %s<=axis<%s' % + (axis, -ndims, ndims)) + elif axis < 0: + raise ValueError('axis may only be negative if ndims is statically known.') + return axis + + +def tile_one_dimension(data, axis, multiple): + """Tiles a single dimension of a tensor.""" + # Assumes axis is a nonnegative int. + if data.shape.ndims is not None: + multiples = [1] * data.shape.ndims + multiples[axis] = multiple + else: + ones_value = tf.ones(tf.rank(data), tf.int32) + multiples = tf.concat( + [ones_value[:axis], [multiple], ones_value[axis + 1:]], axis=0) + return tf.tile(data, multiples) + + +def _all_dimensions(x): + """Returns a 1D-tensor listing all dimensions in x.""" + # Fast path: avoid creating Rank and Range ops if ndims is known. + if isinstance(x, ops.Tensor) and x.get_shape().ndims is not None: + return constant_op.constant(np.arange(x.get_shape().ndims), dtype=tf.int32) + if (isinstance(x, sparse_tensor.SparseTensor) and + x.dense_shape.get_shape().is_fully_defined()): + r = x.dense_shape.get_shape().dims[0].value # sparse.dense_shape is 1-D. + return constant_op.constant(np.arange(r), dtype=tf.int32) + + # Otherwise, we rely on `range` and `rank` to do the right thing at runtime. + return gen_math_ops._range(0, tf.rank(x), 1) + + +# This op is intended to exactly match the semantics of numpy.repeat, with +# one exception: numpy.repeat has special (and somewhat non-intuitive) behavior +# when axis is not specified. Rather than implement that special behavior, we +# simply make `axis` be a required argument. +# +# External (OSS) `tf.repeat` feature request: +# https://github.com/tensorflow/tensorflow/issues/8246 +def repeat_with_axis(data, repeats, axis, name=None): + """Repeats elements of `data`. + + Args: + data: An `N`-dimensional tensor. + repeats: A 1-D integer tensor specifying how many times each element in + `axis` should be repeated. `len(repeats)` must equal `data.shape[axis]`. + Supports broadcasting from a scalar value. + axis: `int`. The axis along which to repeat values. Must be less than + `max(N, 1)`. + name: A name for the operation. + + Returns: + A tensor with `max(N, 1)` dimensions. Has the same shape as `data`, + except that dimension `axis` has size `sum(repeats)`. + #### Examples: + ```python + >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) + ['a', 'a', 'a', 'c', 'c'] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) + [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) + [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] + ``` + """ + if not isinstance(axis, int): + raise TypeError('axis must be an int; got %s' % type(axis).__name__) + + with ops.name_scope(name, 'Repeat', [data, repeats]): + data = ops.convert_to_tensor(data, name='data') + repeats = convert_to_int_tensor(repeats, name='repeats') + repeats.shape.with_rank_at_most(1) + + # If `data` is a scalar, then upgrade it to a vector. + data = _with_nonzero_rank(data) + data_shape = tf.shape(data) + + # If `axis` is negative, then convert it to a positive value. + axis = get_positive_axis(axis, data.shape.ndims) + + # Check data Tensor shapes. + if repeats.shape.ndims == 1: + data.shape.dims[axis].assert_is_compatible_with(repeats.shape[0]) + + # If we know that `repeats` is a scalar, then we can just tile & reshape. + if repeats.shape.ndims == 0: + expanded = tf.expand_dims(data, axis + 1) + tiled = tile_one_dimension(expanded, axis + 1, repeats) + result_shape = tf.concat([data_shape[:axis], [-1], data_shape[axis + 1:]], + axis=0) + return tf.reshape(tiled, result_shape) + + # Broadcast the `repeats` tensor so rank(repeats) == axis + 1. + if repeats.shape.ndims != axis + 1: + repeats_shape = tf.shape(repeats) + repeats_ndims = tf.rank(repeats) + broadcast_shape = tf.concat( + [data_shape[:axis + 1 - repeats_ndims], repeats_shape], axis=0) + repeats = tf.broadcast_to(repeats, broadcast_shape) + repeats.set_shape([None] * (axis + 1)) + + # Create a "sequence mask" based on `repeats`, where slices across `axis` + # contain one `True` value for each repetition. E.g., if + # `repeats = [3, 1, 2]`, then `mask = [[1, 1, 1], [1, 0, 0], [1, 1, 0]]`. + max_repeat = gen_math_ops.maximum( + 0, gen_math_ops._max(repeats, _all_dimensions(repeats))) + mask = tf.sequence_mask(repeats, max_repeat) + + # Add a new dimension around each value that needs to be repeated, and + # then tile that new dimension to match the maximum number of repetitions. + expanded = tf.expand_dims(data, axis + 1) + tiled = tile_one_dimension(expanded, axis + 1, max_repeat) + + # Use `boolean_mask` to discard the extra repeated values. This also + # flattens all dimensions up through `axis`. + masked = tf.boolean_mask(tiled, mask) + + # Reshape the output tensor to add the outer dimensions back. + if axis == 0: + result = masked + else: + result_shape = tf.concat([data_shape[:axis], [-1], data_shape[axis + 1:]], + axis=0) + result = tf.reshape(masked, result_shape) + + # Preserve shape information. + if data.shape.ndims is not None: + new_axis_size = 0 if repeats.shape[0] == 0 else None + result.set_shape(data.shape[:axis].concatenate( + [new_axis_size]).concatenate(data.shape[axis + 1:])) + + return result + + +def repeat(input, repeats, axis=None, name=None): # pylint: disable=redefined-builtin + """Repeat elements of `input`. + + Args: + input: An `N`-dimensional Tensor. + repeats: An 1-D `int` Tensor. The number of repetitions for each element. + repeats is broadcasted to fit the shape of the given axis. `len(repeats)` + must equal `input.shape[axis]` if axis is not None. + axis: An int. The axis along which to repeat values. By default (axis=None), + use the flattened input array, and return a flat output array. + name: A name for the operation. + + Returns: + A Tensor which has the same shape as `input`, except along the given axis. + If axis is None then the output array is flattened to match the flattened + input array. + #### Examples: + ```python + >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0) + ['a', 'a', 'a', 'c', 'c'] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0) + [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]] + >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1) + [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]] + >>> repeat(3, repeats=4) + [3, 3, 3, 3] + >>> repeat([[1,2], [3,4]], repeats=2) + [1, 1, 2, 2, 3, 3, 4, 4] + ``` + """ + if axis is None: + input = tf.reshape(input, [-1]) + axis = 0 + return repeat_with_axis(input, repeats, axis, name) diff --git a/easy_rec/python/compat/feature_column/feature_column.py b/easy_rec/python/compat/feature_column/feature_column.py index 1eb27717d..d446adb76 100644 --- a/easy_rec/python/compat/feature_column/feature_column.py +++ b/easy_rec/python/compat/feature_column/feature_column.py @@ -177,7 +177,8 @@ def _internal_input_layer(features, scope=None, cols_to_output_tensors=None, from_template=False, - feature_name_to_output_tensors=None): + feature_name_to_output_tensors=None, + sort_feature_columns_by_name=True): """See input_layer, `scope` is a name or variable scope to use.""" feature_columns = _normalize_feature_columns(feature_columns) for column in feature_columns: @@ -195,9 +196,11 @@ def _internal_input_layer(features, def _get_logits(): # pylint: disable=missing-docstring builder = _LazyBuilder(features) output_tensors = [] - ordered_columns = [] - for column in sorted(feature_columns, key=lambda x: x.name): - ordered_columns.append(column) + if sort_feature_columns_by_name: + ordered_columns = sorted(feature_columns, key=lambda x: x.name) + else: + ordered_columns = feature_columns + for column in ordered_columns: with variable_scope.variable_scope( None, default_name=column._var_scope_name): # pylint: disable=protected-access tensor = column._get_dense_tensor( # pylint: disable=protected-access @@ -239,7 +242,8 @@ def input_layer(features, trainable=True, cols_to_vars=None, cols_to_output_tensors=None, - feature_name_to_output_tensors=None): + feature_name_to_output_tensors=None, + sort_feature_columns_by_name=True): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. @@ -287,6 +291,7 @@ def input_layer(features, cols_to_output_tensors: If not `None`, must be a dictionary that will be filled with a mapping from '_FeatureColumn' to the associated output `Tensor`s. + sort_feature_columns_by_name: whether to sort feature columns Returns: A `Tensor` which represents input layer of a model. Its shape @@ -303,7 +308,8 @@ def input_layer(features, trainable=trainable, cols_to_vars=cols_to_vars, cols_to_output_tensors=cols_to_output_tensors, - feature_name_to_output_tensors=feature_name_to_output_tensors) + feature_name_to_output_tensors=feature_name_to_output_tensors, + sort_feature_columns_by_name=sort_feature_columns_by_name) # TODO(akshayka): InputLayer should be a subclass of Layer, and it @@ -2530,7 +2536,46 @@ def name(self): @property def raw_name(self): - return self.categorical_column.name + return self.categorical_column.raw_name + + @property + def cardinality(self): + from easy_rec.python.compat.feature_column.feature_column_v2 import HashedCategoricalColumn, \ + BucketizedColumn, WeightedCategoricalColumn, SequenceWeightedCategoricalColumn, \ + CrossedColumn, IdentityCategoricalColumn, VocabularyListCategoricalColumn, \ + VocabularyFileCategoricalColumn + + fc = self.categorical_column + if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): + return fc.hash_bucket_size + + if isinstance(fc, IdentityCategoricalColumn): + return fc.num_buckets + + if isinstance(fc, BucketizedColumn): + return len(fc.boundaries) + 1 + + if isinstance(fc, VocabularyListCategoricalColumn): + return len(fc.vocabulary_list) + fc.num_oov_buckets + + if isinstance(fc, VocabularyFileCategoricalColumn): + return len(fc.vocabulary_size) + fc.num_oov_buckets + + if isinstance(fc, WeightedCategoricalColumn) or isinstance( + fc, SequenceWeightedCategoricalColumn): + sub_fc = fc.categorical_column + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance( + sub_fc, CrossedColumn): + return sub_fc.hash_bucket_size + if isinstance(sub_fc, IdentityCategoricalColumn): + return sub_fc.num_buckets + if isinstance(sub_fc, VocabularyListCategoricalColumn): + return len(sub_fc.vocabulary_list) + fc.num_oov_buckets + if isinstance(sub_fc, VocabularyFileCategoricalColumn): + return len(sub_fc.vocabulary_size) + fc.num_oov_buckets + if isinstance(sub_fc, BucketizedColumn): + return len(sub_fc.boundaries) + 1 + return 1 @property def _var_scope_name(self): @@ -2605,7 +2650,7 @@ def _get_dense_tensor_internal(self, # get zero embedding import os if os.environ.get('tf.estimator.mode', '') != \ - os.environ.get('tf.estimator.ModeKeys.TRAIN', 'train'): + os.environ.get('tf.estimator.ModeKeys.TRAIN', 'train'): initializer = init_ops.zeros_initializer() else: initializer = self.initializer diff --git a/easy_rec/python/compat/feature_column/feature_column_v2.py b/easy_rec/python/compat/feature_column/feature_column_v2.py index e1e4d9304..0ca532bea 100644 --- a/easy_rec/python/compat/feature_column/feature_column_v2.py +++ b/easy_rec/python/compat/feature_column/feature_column_v2.py @@ -1328,6 +1328,71 @@ def numeric_column(key, normalizer_fn=normalizer_fn) +def constant_numeric_column(key, + shape=(1,), + default_value=None, + dtype=dtypes.float32, + feature_name=None): + """Represents real valued or numerical features. + + Example: + + ```python + price = constant_numeric_column('price') + columns = [price, ...] + features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) + dense_tensor = input_layer(features, columns) + + # or + bucketized_price = bucketized_column(price, boundaries=[...]) + columns = [bucketized_price, ...] + features = tf.io.parse_example(..., features=make_parse_example_spec(columns)) + linear_prediction = linear_model(features, columns) + ``` + + Args: + key: A unique string identifying the input feature. It is used as the + column name and the dictionary key for feature parsing configs, feature + `Tensor` objects, and feature columns. + shape: An iterable of integers specifies the shape of the `Tensor`. An + integer can be given which means a single dimension `Tensor` with given + width. The `Tensor` representing the column will have the shape of + [batch_size] + `shape`. + default_value: A single value compatible with `dtype` or an iterable of + values compatible with `dtype` which the column takes on during + `tf.Example` parsing if data is missing. A default value of `None` will + cause `tf.io.parse_example` to fail if an example does not contain this + column. If a single value is provided, the same value will be applied as + the default value for every item. If an iterable of values is provided, + the shape of the `default_value` should be equal to the given `shape`. + dtype: defines the type of values. Default value is `tf.float32`. Must be a + non-quantized, real integer or floating point type. + + Returns: + A `ConstantNumericColumn`. + + Raises: + TypeError: if any dimension in shape is not an int + ValueError: if any dimension in shape is not a positive integer + TypeError: if `default_value` is an iterable but not compatible with `shape` + TypeError: if `default_value` is not compatible with `dtype`. + ValueError: if `dtype` is not convertible to `tf.float32`. + """ + shape = _check_shape(shape, key) + if not (dtype.is_integer or dtype.is_floating): + raise ValueError('dtype must be convertible to float. ' + 'dtype: {}, key: {}'.format(dtype, key)) + default_value = fc_utils.check_default_value(shape, default_value, dtype, key) + + fc_utils.assert_key_is_string(key) + return ConstantNumericColumn( + feature_name=feature_name, + key=key, + shape=shape, + default_value=default_value, + dtype=dtype) + + def bucketized_column(source_column, boundaries): """Represents discretized dense input. @@ -2619,6 +2684,131 @@ def _normalize_feature_columns(feature_columns): return sorted(feature_columns, key=lambda x: x.name) +class ConstantNumericColumn( + DenseColumn, + fc_old._DenseColumn, # pylint: disable=protected-access + collections.namedtuple( + 'ConstantNumericColumn', + ('feature_name', 'key', 'shape', 'default_value', 'dtype'))): + """see `numeric_column`.""" + + @property + def _is_v2_column(self): + return True + + @property + def name(self): + """See `FeatureColumn` base class.""" + return self.feature_name if self.feature_name else self.key + + @property + def raw_name(self): + """See `FeatureColumn` base class.""" + return self.key + + @property + def parse_example_spec(self): + """See `FeatureColumn` base class.""" + return { + self.key: + parsing_ops.FixedLenFeature(self.shape, self.dtype, + self.default_value) + } + + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _parse_example_spec(self): + return self.parse_example_spec + + def _transform_input_tensor(self, input_tensor): + shape = [1] + list(self.shape) + def_val = 0 if self.default_value is None else self.default_value + row = tf.constant(def_val, dtypes.float32, shape) + batch_size = tf.shape(input_tensor)[0] + return tf.tile(row, [batch_size, 1]) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _transform_feature(self, inputs): + input_tensor = inputs.get(self.key) + return self._transform_input_tensor(input_tensor) + + def transform_feature(self, transformation_cache, state_manager): + """See `FeatureColumn` base class. + + Args: + transformation_cache: A `FeatureTransformationCache` object to access + features. + state_manager: A `StateManager` to create / access resources such as + lookup tables. + + Returns: + Normalized input tensor. + + Raises: + ValueError: If a SparseTensor is passed in. + """ + input_tensor = transformation_cache.get(self.key, state_manager) + return self._transform_input_tensor(input_tensor) + + @property + def variable_shape(self): + """See `DenseColumn` base class.""" + return tensor_shape.TensorShape(self.shape) + + @property + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _variable_shape(self): + return self.variable_shape + + def get_dense_tensor(self, transformation_cache, state_manager): + """Returns dense `Tensor` representing numeric feature. + + Args: + transformation_cache: A `FeatureTransformationCache` object to access + features. + state_manager: A `StateManager` to create / access resources such as + lookup tables. + + Returns: + Dense `Tensor` created within `transform_feature`. + """ + # Feature has been already transformed. Return the intermediate + # representation created by _transform_feature. + return transformation_cache.get(self, state_manager) + + @deprecation.deprecated(_FEATURE_COLUMN_DEPRECATION_DATE, + _FEATURE_COLUMN_DEPRECATION) + def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): + del weight_collections + del trainable + return inputs.get(self) + + @property + def parents(self): + """See 'FeatureColumn` base class.""" + return [self.key] + + def _get_config(self): + """See 'FeatureColumn` base class.""" + config = dict(zip(self._fields, self)) + config['normalizer_fn'] = utils.serialize_keras_object(self.normalizer_fn) + config['dtype'] = self.dtype.name + return config + + @classmethod + def _from_config(cls, config, custom_objects=None, columns_by_name=None): + """See 'FeatureColumn` base class.""" + _check_config_keys(config, cls._fields) + kwargs = config.copy() + kwargs['normalizer_fn'] = utils.deserialize_keras_object( + config['normalizer_fn'], custom_objects=custom_objects) + kwargs['dtype'] = dtypes.as_dtype(config['dtype']) + return cls(**kwargs) + + class NumericColumn( DenseColumn, fc_old._DenseColumn, # pylint: disable=protected-access @@ -3377,6 +3567,40 @@ def raw_name(self): """See `FeatureColumn` base class.""" return self.categorical_column.raw_name + @property + def cardinality(self): + fc = self.categorical_column + if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): + return fc.hash_bucket_size + + if isinstance(fc, IdentityCategoricalColumn): + return fc.num_buckets + + if isinstance(fc, BucketizedColumn): + return len(fc.boundaries) + 1 + + if isinstance(fc, VocabularyListCategoricalColumn): + return len(fc.vocabulary_list) + fc.num_oov_buckets + + if isinstance(fc, VocabularyFileCategoricalColumn): + return len(fc.vocabulary_size) + fc.num_oov_buckets + + if isinstance(fc, WeightedCategoricalColumn) or isinstance( + fc, SequenceWeightedCategoricalColumn): + sub_fc = fc.categorical_column + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance( + sub_fc, CrossedColumn): + return sub_fc.hash_bucket_size + if isinstance(sub_fc, IdentityCategoricalColumn): + return sub_fc.num_buckets + if isinstance(sub_fc, VocabularyListCategoricalColumn): + return len(sub_fc.vocabulary_list) + fc.num_oov_buckets + if isinstance(sub_fc, VocabularyFileCategoricalColumn): + return len(sub_fc.vocabulary_size) + fc.num_oov_buckets + if isinstance(sub_fc, BucketizedColumn): + return len(sub_fc.boundaries) + 1 + return 1 + @property def parse_example_spec(self): """See `FeatureColumn` base class.""" @@ -3727,6 +3951,40 @@ def raw_name(self): """See `FeatureColumn` base class.""" return self.categorical_column.raw_name + @property + def cardinality(self): + fc = self.categorical_column + if isinstance(fc, HashedCategoricalColumn) or isinstance(fc, CrossedColumn): + return fc.hash_bucket_size + + if isinstance(fc, IdentityCategoricalColumn): + return fc.num_buckets + + if isinstance(fc, BucketizedColumn): + return len(fc.boundaries) + 1 + + if isinstance(fc, VocabularyListCategoricalColumn): + return len(fc.vocabulary_list) + fc.num_oov_buckets + + if isinstance(fc, VocabularyFileCategoricalColumn): + return len(fc.vocabulary_size) + fc.num_oov_buckets + + if isinstance(fc, WeightedCategoricalColumn) or isinstance( + fc, SequenceWeightedCategoricalColumn): + sub_fc = fc.categorical_column + if isinstance(sub_fc, HashedCategoricalColumn) or isinstance( + sub_fc, CrossedColumn): + return sub_fc.hash_bucket_size + if isinstance(sub_fc, IdentityCategoricalColumn): + return sub_fc.num_buckets + if isinstance(sub_fc, VocabularyListCategoricalColumn): + return len(sub_fc.vocabulary_list) + fc.num_oov_buckets + if isinstance(sub_fc, VocabularyFileCategoricalColumn): + return len(sub_fc.vocabulary_size) + fc.num_oov_buckets + if isinstance(sub_fc, BucketizedColumn): + return len(sub_fc.boundaries) + 1 + return 1 + @property def parse_example_spec(self): """See `FeatureColumn` base class.""" @@ -5193,3 +5451,13 @@ def deserialize_feature_columns(configs, custom_objects=None): deserialize_feature_column(c, custom_objects, columns_by_name) for c in configs ] + + +def is_embedding_column(fc): + if isinstance(fc, EmbeddingColumn): + return True + if isinstance(fc, fc_old._SharedEmbeddingColumn): + return True + if isinstance(fc, SharedEmbeddingColumn): + return True + return False diff --git a/easy_rec/python/compat/sort_ops.py b/easy_rec/python/compat/sort_ops.py new file mode 100644 index 000000000..bd7f92ab1 --- /dev/null +++ b/easy_rec/python/compat/sort_ops.py @@ -0,0 +1,216 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Support for sorting tensors. + +@@argsort +@@sort +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops as framework_ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.util.tf_export import tf_export + + +@tf_export('sort') +def sort(values, axis=-1, direction='ASCENDING', name=None): + """Sorts a tensor. + + Usage: + + ```python + import tensorflow as tf + a = [1, 10, 26.9, 2.8, 166.32, 62.3] + b = tf.sort(a,axis=-1,direction='ASCENDING',name=None) + c = tf.keras.backend.eval(b) + # Here, c = [ 1. 2.8 10. 26.9 62.3 166.32] + ``` + + Args: + values: 1-D or higher numeric `Tensor`. + axis: The axis along which to sort. The default is -1, which sorts the last + axis. + direction: The direction in which to sort the values (`'ASCENDING'` or + `'DESCENDING'`). + name: Optional name for the operation. + + Returns: + A `Tensor` with the same dtype and shape as `values`, with the elements + sorted along the given `axis`. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + with framework_ops.name_scope(name, 'sort'): + return _sort_or_argsort(values, axis, direction, return_argsort=False) + + +@tf_export('argsort') +def argsort(values, axis=-1, direction='ASCENDING', stable=False, name=None): + """Returns the indices of a tensor that give its sorted order along an axis. + + For a 1D tensor, `tf.gather(values, tf.argsort(values))` is equivalent to + `tf.sort(values)`. For higher dimensions, the output has the same shape as + `values`, but along the given axis, values represent the index of the sorted + element in that slice of the tensor at the given position. + + Usage: + + ```python + import tensorflow as tf + a = [1, 10, 26.9, 2.8, 166.32, 62.3] + b = tf.argsort(a,axis=-1,direction='ASCENDING',stable=False,name=None) + c = tf.keras.backend.eval(b) + # Here, c = [0 3 1 2 5 4] + ``` + + Args: + values: 1-D or higher numeric `Tensor`. + axis: The axis along which to sort. The default is -1, which sorts the last + axis. + direction: The direction in which to sort the values (`'ASCENDING'` or + `'DESCENDING'`). + stable: If True, equal elements in the original tensor will not be + re-ordered in the returned order. Unstable sort is not yet implemented, + but will eventually be the default for performance reasons. If you require + a stable order, pass `stable=True` for forwards compatibility. + name: Optional name for the operation. + + Returns: + An int32 `Tensor` with the same shape as `values`. The indices that would + sort each slice of the given `values` along the given `axis`. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + del stable # Unused. + with framework_ops.name_scope(name, 'argsort'): + return _sort_or_argsort(values, axis, direction, return_argsort=True) + + +def _sort_or_argsort(values, axis, direction, return_argsort): + """Internal sort/argsort implementation. + + Args: + values: The input values. + axis: The axis along which to sort. + direction: 'ASCENDING' or 'DESCENDING'. + return_argsort: Whether to return the argsort result. + + Returns: + Either the sorted values, or the indices of the sorted values in the + original tensor. See the `sort` and `argsort` docstrings. + + Raises: + ValueError: If axis is not a constant scalar, or the direction is invalid. + """ + if direction not in _SORT_IMPL: + raise ValueError('%s should be one of %s' % + (direction, ', '.join(sorted(_SORT_IMPL.keys())))) + # Axis must be an integer, not a Tensor. + axis = framework_ops.convert_to_tensor(axis, name='axis') + axis_static = tensor_util.constant_value(axis) + if axis.shape.ndims != 0 or axis_static is None: + raise ValueError('axis must be a constant scalar') + axis_static = int(axis_static) # Avoids NumPy casting error + + values = framework_ops.convert_to_tensor(values, name='values') + + return _SORT_IMPL[direction](values, axis_static, return_argsort) + + +def _descending_sort(values, axis, return_argsort=False): + """Sorts values in reverse using `top_k`. + + Args: + values: Tensor of numeric values. + axis: Index of the axis which values should be sorted along. + return_argsort: If False, return the sorted values. If True, return the + indices that would sort the values. + + Returns: + The sorted values. + """ + k = array_ops.shape(values)[axis] + rank = array_ops.rank(values) + static_rank = values.shape.ndims + # Fast path: sorting the last axis. + if axis == -1 or axis + 1 == values.get_shape().ndims: + top_k_input = values + transposition = None + else: + # Otherwise, transpose the array. Swap axes `axis` and `rank - 1`. + if axis < 0: + # Calculate the actual axis index if counting from the end. Use the static + # rank if available, or else make the axis back into a tensor. + axis += static_rank or rank + if static_rank is not None: + # Prefer to calculate the transposition array in NumPy and make it a + # constant. + transposition = constant_op.constant( + np.r_[ + # Axes up to axis are unchanged. + np.arange(axis), + # Swap axis and rank - 1. + [static_rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + np.arange(axis + 1, static_rank - 1), + # Swap axis and rank - 1. + [axis]], + name='transposition') + else: + # Generate the transposition array from the tensors. + transposition = array_ops.concat( + [ + # Axes up to axis are unchanged. + math_ops.range(axis), + # Swap axis and rank - 1. + [rank - 1], + # Axes in [axis + 1, rank - 1) are unchanged. + math_ops.range(axis + 1, rank - 1), + # Swap axis and rank - 1. + [axis] + ], + axis=0) + top_k_input = array_ops.transpose(values, transposition) + + values, indices = nn_ops.top_k(top_k_input, k) + return_value = indices if return_argsort else values + if transposition is not None: + # transposition contains a single cycle of length 2 (swapping 2 elements), + # so it is an involution (it is its own inverse). + return_value = array_ops.transpose(return_value, transposition) + return return_value + + +def _ascending_sort(values, axis, return_argsort=False): + # Negate the values to get the ascending order from descending sort. + values_or_indices = _descending_sort(-values, axis, return_argsort) + # If not argsort, negate the values again. + return values_or_indices if return_argsort else -values_or_indices + + +_SORT_IMPL = { + 'ASCENDING': _ascending_sort, + 'DESCENDING': _descending_sort, +} diff --git a/easy_rec/python/feature_column/feature_column.py b/easy_rec/python/feature_column/feature_column.py index 04fc07baf..1f62faef1 100644 --- a/easy_rec/python/feature_column/feature_column.py +++ b/easy_rec/python/feature_column/feature_column.py @@ -129,6 +129,8 @@ def _cmp_embed_config(a, b): self.parse_sequence_feature(config) elif config.feature_type == config.ExprFeature: self.parse_expr_feature(config) + elif config.feature_type == config.ConstFeature: + self.parse_const_feature(config) else: assert False, 'invalid feature type: %s' % config.feature_type except FeatureKeyError: @@ -331,10 +333,7 @@ def parse_tag_feature(self, config): default_value=0, feature_name=feature_name) - if len(config.input_names) > 1: - tag_fc = feature_column.weighted_categorical_column( - tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) - elif config.HasField('kv_separator'): + if len(config.input_names) > 1 or config.HasField('kv_separator'): tag_fc = feature_column.weighted_categorical_column( tag_fc, weight_feature_key=feature_name + '_w', dtype=tf.float32) @@ -400,9 +399,7 @@ def parse_raw_feature(self, config): self._deep_columns[feature_name] = fc def parse_expr_feature(self, config): - """Generate raw features columns. - - if boundaries is set, will be converted to category_column first. + """Generate expression features columns. Args: config: instance of easy_rec.python.protos.feature_config_pb2.FeatureConfig @@ -412,7 +409,27 @@ def parse_expr_feature(self, config): fc = feature_column.numeric_column( feature_name, shape=(1,), feature_name=feature_name) if self.is_wide(config): - self._add_wide_embedding_column(fc, config) + self._wide_columns[feature_name] = fc + if self.is_deep(config): + self._deep_columns[feature_name] = fc + + def parse_const_feature(self, config): + """Generate constant features columns. + + used for mask input features. + + Args: + config: instance of easy_rec.python.protos.feature_config_pb2.FeatureConfig + """ + feature_name = config.feature_name if config.HasField('feature_name') \ + else config.input_names[0] + dim = config.raw_input_dim + if config.HasField('embedding_dim'): + dim = config.embedding_dim + fc = feature_column.constant_numeric_column( + feature_name, shape=(dim,), feature_name=feature_name) + if self.is_wide(config): + self._wide_columns[feature_name] = fc if self.is_deep(config): self._deep_columns[feature_name] = fc diff --git a/easy_rec/python/inference/predictor.py b/easy_rec/python/inference/predictor.py index dba53f967..e17871892 100644 --- a/easy_rec/python/inference/predictor.py +++ b/easy_rec/python/inference/predictor.py @@ -222,6 +222,9 @@ def _build_model(self): logging.info('Load input binding: %s -> %s' % (name, tensor.name)) input_name = tensor.name input_name, _ = input_name.split(':') + input_op = self._graph.get_operation_by_name(input_name) + if input_op.type == 'PlaceholderWithDefault': + continue try: input_id = input_name.split('_')[-1] input_id = int(input_id) diff --git a/easy_rec/python/input/augment.py b/easy_rec/python/input/augment.py new file mode 100644 index 000000000..c9802c88c --- /dev/null +++ b/easy_rec/python/input/augment.py @@ -0,0 +1,91 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +from easy_rec.python.utils.shape_utils import get_shape_list + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def assign(input_tensor, position=None, value=None): + input_tensor[tuple(position)] = value + return input_tensor + + +def item_mask(aug_data, length, gamma=0.3): + length1 = tf.cast(length, dtype=tf.float32) + num_mask = tf.cast(tf.math.floor(length1 * gamma), dtype=tf.int32) + seq = tf.range(length, dtype=tf.int32) + mask_index = tf.random.shuffle(seq)[:num_mask] + masked_item_seq = aug_data + masked_item_seq = tf.py_func( + assign, + inp=[masked_item_seq, [mask_index], 0], + Tout=masked_item_seq.dtype) + return masked_item_seq, length + + +def item_crop(aug_data, length, eta=0.6): + length1 = tf.cast(length, dtype=tf.float32) + max_length = tf.cast(get_shape_list(aug_data)[0], dtype=tf.int32) + embedding_size = get_shape_list(aug_data)[1] + + num_left = tf.cast(tf.math.floor(length1 * eta), dtype=tf.int32) + crop_begin = tf.random.uniform([1], + minval=0, + maxval=length - num_left, + dtype=tf.int32)[0] + cropped_item_seq = tf.zeros([get_shape_list(aug_data)[0], embedding_size]) + cropped_item_seq = tf.where( + crop_begin + num_left < max_length, + tf.concat([ + aug_data[crop_begin:crop_begin + num_left], + cropped_item_seq[:max_length - num_left] + ], + axis=0), + tf.concat([aug_data[crop_begin:], cropped_item_seq[:crop_begin]], axis=0)) + return cropped_item_seq, num_left + + +def item_reorder(aug_data, length, beta=0.6): + length1 = tf.cast(length, dtype=tf.float32) + num_reorder = tf.cast(tf.math.floor(length1 * beta), dtype=tf.int32) + reorder_begin = tf.random.uniform([1], + minval=0, + maxval=length - num_reorder, + dtype=tf.int32)[0] + shuffle_index = tf.range(reorder_begin, reorder_begin + num_reorder) + shuffle_index = tf.random.shuffle(shuffle_index) + x = tf.range(get_shape_list(aug_data)[0]) + left = tf.slice(x, [0], [reorder_begin]) + right = tf.slice(x, [reorder_begin + num_reorder], [-1]) + reordered_item_index = tf.concat([left, shuffle_index, right], axis=0) + reordered_item_seq = tf.scatter_nd( + tf.expand_dims(reordered_item_index, axis=1), aug_data, + tf.shape(aug_data)) + return reordered_item_seq, length + + +def augment(x): + seq, length = x + flag = tf.range(3, dtype=tf.int32) + flag1 = tf.random.shuffle(flag)[:1][0] + aug_seq, aug_len = tf.cond( + tf.equal(flag1, 0), lambda: item_crop(seq, length), lambda: tf.cond( + tf.equal(flag1, 1), lambda: item_mask(seq, length), lambda: + item_reorder(seq, length))) + + return [aug_seq, aug_len] + + +def input_aug_data(original_data, seq_len): + print('seq_len:', seq_len) + lengths = tf.cast(seq_len, dtype=tf.int32) + aug_seq1, aug_len1 = tf.map_fn( + augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) + aug_seq2, aug_len2 = tf.map_fn( + augment, elems=(original_data, lengths), dtype=[tf.float32, tf.int32]) + aug_seq1 = tf.reshape(aug_seq1, tf.shape(original_data)) + aug_seq2 = tf.reshape(aug_seq2, tf.shape(original_data)) + return aug_seq1, aug_seq2, aug_len1, aug_len2 diff --git a/easy_rec/python/input/input.py b/easy_rec/python/input/input.py index 52581b4e2..9b8c4b3b0 100644 --- a/easy_rec/python/input/input.py +++ b/easy_rec/python/input/input.py @@ -1,11 +1,14 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. import logging +import os from abc import abstractmethod from collections import OrderedDict +from easy_rec.python.utils import conditional import six import tensorflow as tf +from tensorflow.python.framework import ops from tensorflow.python.platform import gfile from easy_rec.python.core import sampler as sampler_lib @@ -18,6 +21,7 @@ from easy_rec.python.utils.input_utils import get_type_defaults from easy_rec.python.utils.load_class import get_register_class_meta from easy_rec.python.utils.load_class import load_by_path +from easy_rec.python.utils.tf_utils import get_config_type from easy_rec.python.utils.tf_utils import get_tf_type if tf.__version__ >= '2.0': @@ -93,12 +97,14 @@ def __init__(self, # from the types defined in input_fields # it is used in create_multi_placeholders self._multi_value_types = {} - + self._const_features = set() self._normalizer_fn = {} for fc in self._feature_configs: for input_name in fc.input_names: assert input_name in self._input_fields, 'invalid input_name in %s' % str( fc) + if fc.feature_type == fc.ConstFeature: + self._const_features.add(input_name) if input_name not in self._effective_fields: self._effective_fields.append(input_name) @@ -225,6 +231,19 @@ def should_stop(self, curr_epoch): total_epoch = 1 return total_epoch is not None and curr_epoch >= total_epoch + def get_erase_features(self): + if len(self._const_features) == 0: + return self._const_features + + for fc in self._feature_configs: + if fc.feature_type == fc.ConstFeature: + continue + for input_name in fc.input_names: + if input_name in self._const_features: + self._const_features.remove(input_name) + + return self._const_features + def create_multi_placeholders(self, export_config): """Create multiply placeholders on export, one for each feature. @@ -252,6 +271,7 @@ def create_multi_placeholders(self, export_config): self._input_fields[fid] != sample_weight_field ] + erase_features = self.get_erase_features() inputs = {} for fid in effective_fids: input_name = self._input_fields[fid] @@ -265,12 +285,25 @@ def create_multi_placeholders(self, export_config): tf_type = self._multi_value_types[input_name] logging.info('multi value input_name: %s, dtype: %s' % (input_name, tf_type)) - finput = tf.placeholder(tf_type, [None, None], name=placeholder_name) + if input_name in erase_features: + conf_type = get_config_type(tf_type) + def_val = self.get_type_defaults(conf_type, + self._input_field_defaults[fid]) + finput = tf.placeholder_with_default([def_val], [None, None], + name=placeholder_name) + else: + finput = tf.placeholder(tf_type, [None, None], name=placeholder_name) else: ftype = self._input_field_types[fid] tf_type = get_tf_type(ftype) logging.info('input_name: %s, dtype: %s' % (input_name, tf_type)) - finput = tf.placeholder(tf_type, [None], name=placeholder_name) + if input_name in erase_features: + def_val = self.get_type_defaults(ftype, + self._input_field_defaults[fid]) + finput = tf.placeholder_with_default([def_val], [None], + name=placeholder_name) + else: + finput = tf.placeholder(tf_type, [None], name=placeholder_name) inputs[input_name] = finput features = {x: inputs[x] for x in inputs} features = self._preprocess(features) @@ -302,11 +335,15 @@ def create_placeholders(self, export_config): len(effective_fids)) input_vals = tf.reshape( input_vals, [-1, len(effective_fids)], name='input_reshape') + + erase_features = self.get_erase_features() features = {} for tmp_id, fid in enumerate(effective_fids): ftype = self._input_field_types[fid] tf_type = get_tf_type(ftype) input_name = self._input_fields[fid] + if input_name in erase_features: + continue if tf_type in [tf.float32, tf.double, tf.int32, tf.int64]: features[input_name] = tf.string_to_number( input_vals[:, tmp_id], @@ -472,6 +509,21 @@ def _parse_id_feature(self, fc, parsed_dict, field_dict): tf.int32, name='%s_str_2_int' % input_0) + def _parse_const_feature(self, fc, parsed_dict, field_dict, batch_size): + input_0 = fc.input_names[0] + input_tensor = field_dict[input_0] + + def expand_input(): + multiples = [1] * input_tensor.shape.ndims + multiples[0] = batch_size + return tf.tile(input_tensor, multiples) + + input_tensor = tf.cond( + tf.equal(tf.shape(input_tensor)[0], batch_size), lambda: input_tensor, + expand_input) + feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 + parsed_dict[feature_name] = input_tensor + def _parse_raw_feature(self, fc, parsed_dict, field_dict): input_0 = fc.input_names[0] feature_name = fc.feature_name if fc.HasField('feature_name') else input_0 @@ -762,6 +814,14 @@ def _preprocess(self, field_dict): parsed_dict[k] = v self._appended_fields.append(k) + batch_size = 1 + for fc in self._feature_configs: + feature_type = fc.feature_type + if feature_type != fc.ConstFeature: + input_0 = fc.input_names[0] + batch_size = tf.shape(field_dict[input_0])[0] + break + for fc in self._feature_configs: feature_name = fc.feature_name feature_type = fc.feature_type @@ -779,6 +839,8 @@ def _preprocess(self, field_dict): self._parse_id_feature(fc, parsed_dict, field_dict) elif feature_type == fc.ExprFeature: self._parse_expr_feature(fc, parsed_dict, field_dict) + elif feature_type == fc.ConstFeature: + self._parse_const_feature(fc, parsed_dict, field_dict, batch_size) else: feature_name = fc.feature_name if fc.HasField( 'feature_name') else fc.input_names[0] @@ -952,11 +1014,15 @@ def _input_fn(mode=None, params=None, config=None): dataset = self._build(mode, params) return dataset elif mode is None: # serving_input_receiver_fn for export SavedModel + place_on_cpu = os.getenv('place_embedding_on_cpu') + place_on_cpu = eval(place_on_cpu) if place_on_cpu else False if export_config.multi_placeholder: - inputs, features = self.create_multi_placeholders(export_config) + with conditional(place_on_cpu, ops.device('/CPU:0')): + inputs, features = self.create_multi_placeholders(export_config) return tf.estimator.export.ServingInputReceiver(features, inputs) else: - inputs, features = self.create_placeholders(export_config) + with conditional(place_on_cpu, ops.device('/CPU:0')): + inputs, features = self.create_placeholders(export_config) print('built feature placeholders. features: {}'.format( features.keys())) return tf.estimator.export.ServingInputReceiver(features, inputs) diff --git a/easy_rec/python/layers/backbone.py b/easy_rec/python/layers/backbone.py new file mode 100644 index 000000000..3093d9f8e --- /dev/null +++ b/easy_rec/python/layers/backbone.py @@ -0,0 +1,349 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import six +import tensorflow as tf +from google.protobuf import struct_pb2 + +from easy_rec.python.layers.common_layers import EnhancedInputLayer +from easy_rec.python.layers.keras import MLP +from easy_rec.python.layers.utils import Parameter +from easy_rec.python.protos import backbone_pb2 +from easy_rec.python.utils.dag import DAG +from easy_rec.python.utils.load_class import load_keras_layer + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class Package(object): + """A sub DAG of tf ops for reuse.""" + __packages = {} + + def __init__(self, config, features, input_layer, l2_reg=None): + self._config = config + self._features = features + self._input_layer = input_layer + self._l2_reg = l2_reg + self._dag = DAG() + self._name_to_blocks = {} + self.loss_dict = {} + input_feature_groups = set() + for block in config.blocks: + if len(block.inputs) == 0: + raise ValueError('block takes at least one input: %s' % block.name) + self._dag.add_node(block.name) + self._name_to_blocks[block.name] = block + layer = block.WhichOneof('layer') + if layer == 'input_layer': + if len(block.inputs) != 1: + raise ValueError('input layer `%s` takes only one input' % block.name) + one_input = block.inputs[0] + name = one_input.WhichOneof('name') + if name != 'feature_group_name': + raise KeyError( + '`feature_group_name` should be set for input layer: ' + + block.name) + input_name = one_input.feature_group_name + if not input_layer.has_group(input_name): + raise KeyError('invalid feature group name: ' + input_name) + if input_name in input_feature_groups: + logging.warning('input `%s` already exists in other block' % + input_name) + input_feature_groups.add(input_name) + + num_groups = len(input_feature_groups) + num_blocks = len(self._name_to_blocks) - num_groups + assert num_blocks > 0, 'there must be at least one block in backbone' + + num_pkg_input = 0 + for block in config.blocks: + layer = block.WhichOneof('layer') + if layer == 'input_layer': + continue + if block.name in input_feature_groups: + raise KeyError('block name can not be one of feature groups:' + + block.name) + for input_node in block.inputs: + input_type = input_node.WhichOneof('name') + if input_type == 'package_name': + num_pkg_input += 1 + continue + input_name = getattr(input_node, input_type) + if input_name in self._name_to_blocks: + assert input_name != block.name, 'input name can not equal to block name:' + input_name + self._dag.add_edge(input_name, block.name) + elif input_name not in input_feature_groups: + if input_layer.has_group(input_name): + logging.info('adding an input_layer block: ' + input_name) + new_block = backbone_pb2.Block() + new_block.name = input_name + input_cfg = backbone_pb2.Input() + input_cfg.feature_group_name = input_name + new_block.inputs.append(input_cfg) + new_block.input_layer.CopyFrom(backbone_pb2.InputLayer()) + self._name_to_blocks[input_name] = new_block + self._dag.add_node(input_name) + self._dag.add_edge(input_name, block.name) + input_feature_groups.add(input_name) + else: + raise KeyError( + 'invalid input name `%s`, must be the name of either a feature group or an another block' + % input_name) + num_groups = len(input_feature_groups) + assert num_pkg_input > 0 or num_groups > 0, 'there must be at least one input layer/feature group' + + if len(config.concat_blocks) == 0: + leaf = self._dag.all_leaves() + logging.warning( + '%s has no `concat_blocks`, try to use all leaf blocks: %s' % + (config.name, ','.join(leaf))) + self._config.concat_blocks.extend(leaf) + + Package.__packages[self._config.name] = self + + def block_input(self, config, block_outputs, training=None): + inputs = [] + for input_node in config.inputs: + input_type = input_node.WhichOneof('name') + input_name = getattr(input_node, input_type) + if input_type == 'package_name': + if input_name not in Package.__packages: + raise KeyError('package name `%s` does not exists' % input_name) + package = Package.__packages[input_name] + input_feature = package(training) + if len(package.loss_dict) > 0: + self.loss_dict.update(package.loss_dict) + elif input_name in block_outputs: + input_feature = block_outputs[input_name] + else: + raise KeyError('input name `%s` does not exists' % input_name) + + if input_node.HasField('input_slice'): + fn = eval('lambda x: x' + input_node.input_slice.strip()) + input_feature = fn(input_feature) + if input_node.HasField('input_fn'): + fn = eval(input_node.input_fn) + input_feature = fn(input_feature) + inputs.append(input_feature) + + if config.merge_inputs_into_list: + output = inputs + else: + output = merge_inputs(inputs, config.input_concat_axis, config.name) + + if config.HasField('extra_input_fn'): + fn = eval(config.extra_input_fn) + output = fn(output) + return output + + def __call__(self, is_training, **kwargs): + with tf.variable_scope(self._config.name, reuse=tf.AUTO_REUSE): + return self.call(is_training) + + def call(self, is_training): + block_outputs = {} + blocks = self._dag.topological_sort() + logging.info(self._config.name + ' topological order: ' + ','.join(blocks)) + print(self._config.name + ' topological order: ' + ','.join(blocks)) + for block in blocks: + config = self._name_to_blocks[block] + if config.layers: # sequential layers + logging.info('call sequential %d layers' % len(config.layers)) + output = self.block_input(config, block_outputs, is_training) + for layer in config.layers: + output = self.call_layer(output, layer, block, is_training) + block_outputs[block] = output + continue + # just one of layer + layer = config.WhichOneof('layer') + if layer is None: # identity layer + block_outputs[block] = self.block_input(config, block_outputs, + is_training) + elif layer == 'input_layer': + conf = config.input_layer + input_fn = EnhancedInputLayer(conf, self._input_layer, self._features) + feature_group = config.inputs[0].feature_group_name + output = input_fn(feature_group, is_training) + block_outputs[block] = output + else: + inputs = self.block_input(config, block_outputs, is_training) + output = self.call_layer(inputs, config, block, is_training) + block_outputs[block] = output + + outputs = [] + for output in self._config.concat_blocks: + if output in block_outputs: + temp = block_outputs[output] + if type(temp) in (tuple, list): + outputs.extend(temp) + else: + outputs.append(temp) + else: + raise ValueError('No output `%s` of backbone to be concat' % output) + output = merge_inputs(outputs, msg='backbone') + return output + + def call_keras_layer(self, layer_conf, inputs, name, training): + layer_cls, customize = load_keras_layer(layer_conf.class_name) + if layer_cls is None: + raise ValueError('Invalid keras layer class name: ' + + layer_conf.class_name) + + param_type = layer_conf.WhichOneof('params') + if customize: + if param_type is None or param_type == 'st_params': + params = Parameter(layer_conf.st_params, True, l2_reg=self._l2_reg) + else: + pb_params = getattr(layer_conf, param_type) + params = Parameter(pb_params, False, l2_reg=self._l2_reg) + layer = layer_cls(params, name=name) + kwargs = {'loss_dict': self.loss_dict} + return layer(inputs, training=training, **kwargs) + else: # internal keras layer + if param_type is None: + layer = layer_cls(name=name) + else: + assert param_type == 'st_params', 'internal keras layer only support st_params' + try: + kwargs = convert_to_dict(layer_conf.st_params) + logging.info('call %s layer with params %r' % + (layer_conf.class_name, kwargs)) + layer = layer_cls(name=name, **kwargs) + except TypeError as e: + logging.warning(e) + args = map(format_value, layer_conf.st_params.values()) + logging.info('try to call %s layer with params %r' % + (layer_conf.class_name, args)) + layer = layer_cls(*args, name=name) + try: + return layer(inputs, training=training) + except TypeError: + return layer(inputs) + + def call_layer(self, inputs, config, name, training): + layer_name = config.WhichOneof('layer') + if layer_name == 'keras_layer': + return self.call_keras_layer(config.keras_layer, inputs, name, training) + if layer_name == 'lambda': + conf = getattr(config, 'lambda') + fn = eval(conf.expression) + return fn(inputs) + if layer_name == 'repeat': + conf = config.repeat + n_loop = conf.num_repeat + outputs = [] + for i in range(n_loop): + name_i = '%s_%d' % (name, i) + output = self.call_keras_layer(conf.keras_layer, inputs, name_i, + training) + outputs.append(output) + if len(outputs) == 1: + return outputs[0] + if conf.HasField('output_concat_axis'): + return tf.concat(outputs, conf.output_concat_axis) + return outputs + if layer_name == 'recurrent': + conf = config.recurrent + fixed_input_index = -1 + if conf.HasField('fixed_input_index'): + fixed_input_index = conf.fixed_input_index + if fixed_input_index >= 0: + assert type(inputs) in (tuple, list), '%s inputs must be a list' + output = inputs + for i in range(conf.num_steps): + name_i = '%s_%d' % (name, i) + layer = conf.keras_layer + output_i = self.call_keras_layer(layer, output, name_i, training) + if fixed_input_index >= 0: + j = 0 + for idx in range(len(output)): + if idx == fixed_input_index: + continue + if type(output_i) in (tuple, list): + output[idx] = output_i[j] + else: + output[idx] = output_i + j += 1 + else: + output = output_i + if fixed_input_index >= 0: + del output[fixed_input_index] + if len(output) == 1: + return output[0] + return output + return output + + raise NotImplementedError('Unsupported backbone layer:' + layer_name) + + +class Backbone(object): + """Configurable Backbone Network.""" + + def __init__(self, config, features, input_layer, l2_reg=None): + self._config = config + self._l2_reg = l2_reg + self.loss_dict = {} + for pkg in config.packages: + Package(pkg, features, input_layer, l2_reg) + + main_pkg = backbone_pb2.BlockPackage() + main_pkg.name = 'backbone' + main_pkg.blocks.MergeFrom(config.blocks) + main_pkg.concat_blocks.extend(config.concat_blocks) + self._main_pkg = Package(main_pkg, features, input_layer, l2_reg) + + def __call__(self, is_training, **kwargs): + output = self._main_pkg(is_training, **kwargs) + if len(self._main_pkg.loss_dict) > 0: + self.loss_dict = self._main_pkg.loss_dict + + if self._config.HasField('top_mlp'): + params = Parameter.make_from_pb(self._config.top_mlp) + params.l2_regularizer = self._l2_reg + final_mlp = MLP(params, name='backbone_top_mlp') + output = final_mlp(output, training=is_training) + return output + + +def merge_inputs(inputs, axis=-1, msg=''): + if len(inputs) == 0: + raise ValueError('no inputs to be concat:' + msg) + if len(inputs) == 1: + return inputs[0] + + from functools import reduce + if all(map(lambda x: type(x) == list, inputs)): + # merge multiple lists into a list + return reduce(lambda x, y: x + y, inputs) + + if any(map(lambda x: type(x) == list, inputs)): + logging.warning('%s: try to merge inputs into list' % msg) + return reduce(lambda x, y: x + y, + [e if type(e) == list else [e] for e in inputs]) + + if axis != -1: + logging.info('concat inputs %s axis=%d' % (msg, axis)) + return tf.concat(inputs, axis=axis) + + +def format_value(value): + value_type = type(value) + if value_type == six.text_type: + return str(value) + if value_type == float: + int_v = int(value) + return int_v if int_v == value else value + if value_type == struct_pb2.ListValue: + return map(format_value, value) + if value_type == struct_pb2.Struct: + return convert_to_dict(value) + return value + + +def convert_to_dict(struct): + kwargs = {} + for key, value in struct.items(): + kwargs[str(key)] = format_value(value) + return kwargs diff --git a/easy_rec/python/layers/cmbf.py b/easy_rec/python/layers/cmbf.py index b633bac2b..e5f1caeb2 100644 --- a/easy_rec/python/layers/cmbf.py +++ b/easy_rec/python/layers/cmbf.py @@ -33,7 +33,8 @@ def __init__(self, model_config, feature_configs, features, cmbf_config, has_feature = True self._txt_seq_features = None if input_layer.has_group('text'): - self._txt_seq_features = input_layer(features, 'text', is_combine=False) + self._txt_seq_features, _, _ = input_layer( + features, 'text', is_combine=False) has_feature = True self._other_features = None if input_layer.has_group('other'): # e.g. statistical feature diff --git a/easy_rec/python/layers/common_layers.py b/easy_rec/python/layers/common_layers.py index 165fce5e1..fae4fe3fc 100644 --- a/easy_rec/python/layers/common_layers.py +++ b/easy_rec/python/layers/common_layers.py @@ -1,8 +1,12 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import six import tensorflow as tf +from easy_rec.python.compat.layers import layer_norm as tf_layer_norm +from easy_rec.python.utils.activation import get_activation + if tf.__version__ >= '2.0': tf = tf.compat.v1 @@ -14,6 +18,8 @@ def highway(x, scope='highway', dropout=0.0, reuse=None): + if isinstance(activation, six.string_types): + activation = get_activation(activation) with tf.variable_scope(scope, reuse): if size is None: size = x.shape.as_list()[-1] @@ -61,3 +67,80 @@ def text_cnn(x, pool_flat = tf.concat( pooled_outputs, 1) # shape: (batch_size, num_filters * len(filter_sizes)) return pool_flat + + +def layer_norm(input_tensor, name=None, reuse=None): + """Run layer normalization on the last dimension of the tensor.""" + return tf_layer_norm( + inputs=input_tensor, + begin_norm_axis=-1, + begin_params_axis=-1, + reuse=reuse, + scope=name) + + +class EnhancedInputLayer(object): + """Enhance the raw input layer.""" + + def __init__(self, config, input_layer, feature_dict): + if config.do_batch_norm and config.do_layer_norm: + raise ValueError( + 'can not do batch norm and layer norm for input layer at the same time' + ) + self._config = config + self._input_layer = input_layer + self._feature_dict = feature_dict + + def __call__(self, group, is_training, **kwargs): + with tf.name_scope('input_' + group): + return self.call(group, is_training) + + def call(self, group, is_training): + if self._config.output_seq_and_normal_feature: + seq_features, target_feature, target_features = self._input_layer( + self._feature_dict, group, is_combine=False) + return seq_features, target_features + + features, feature_list = self._input_layer(self._feature_dict, group) + num_features = len(feature_list) + + do_ln = self._config.do_layer_norm + do_bn = self._config.do_batch_norm + do_feature_dropout = is_training and 0.0 < self._config.feature_dropout_rate < 1.0 + if do_feature_dropout: + keep_prob = 1.0 - self._config.feature_dropout_rate + bern = tf.distributions.Bernoulli(probs=keep_prob, dtype=tf.float32) + mask = bern.sample(num_features) + elif do_bn: + features = tf.layers.batch_normalization(features, training=is_training) + elif do_ln: + features = layer_norm(features) + + do_dropout = 0.0 < self._config.dropout_rate < 1.0 + if do_feature_dropout or do_ln or do_bn or do_dropout: + for i in range(num_features): + fea = feature_list[i] + if self._config.do_batch_norm: + fea = tf.layers.batch_normalization(fea, training=is_training) + elif self._config.do_layer_norm: + fea = layer_norm(fea) + if do_dropout: + fea = tf.layers.dropout( + fea, self._config.dropout_rate, training=is_training) + if do_feature_dropout: + fea = tf.div(fea, keep_prob) * mask[i] + feature_list[i] = fea + if do_feature_dropout: + features = tf.concat(feature_list, axis=-1) + + if do_dropout and not do_feature_dropout: + features = tf.layers.dropout( + features, self._config.dropout_rate, training=is_training) + + if self._config.only_output_feature_list: + return feature_list + if self._config.only_output_3d_tensor: + return tf.stack(feature_list, axis=1) + if self._config.output_2d_tensor_and_feature_list: + return features, feature_list + return features diff --git a/easy_rec/python/layers/dnn.py b/easy_rec/python/layers/dnn.py index 7a57f5661..e09891845 100644 --- a/easy_rec/python/layers/dnn.py +++ b/easy_rec/python/layers/dnn.py @@ -18,7 +18,8 @@ def __init__(self, name='dnn', is_training=False, last_layer_no_activation=False, - last_layer_no_batch_norm=False): + last_layer_no_batch_norm=False, + reuse=None): """Initializes a `DNN` Layer. Args: @@ -28,6 +29,7 @@ def __init__(self, is_training: train phase or not, impact batch_norm and dropout last_layer_no_activation: in last layer, use or not use activation last_layer_no_batch_norm: in last layer, use or not use batch norm + reuse: Boolean, whether to reuse the weights of a previous layer by the same name. """ self._config = dnn_config self._l2_reg = l2_reg @@ -38,6 +40,7 @@ def __init__(self, self._config.activation, training=is_training) self._last_layer_no_activation = last_layer_no_activation self._last_layer_no_batch_norm = last_layer_no_batch_norm + self._reuse = reuse @property def hidden_units(self): @@ -59,14 +62,16 @@ def __call__(self, deep_fea, hidden_layer_feature_output=False): units=unit, kernel_regularizer=self._l2_reg, activation=None, - name='%s/dnn_%d' % (self._name, i)) + name='%s/dnn_%d' % (self._name, i), + reuse=self._reuse) if self._config.use_bn and ((i + 1 < hidden_units_len) or not self._last_layer_no_batch_norm): deep_fea = tf.layers.batch_normalization( deep_fea, training=self._is_training, trainable=True, - name='%s/dnn_%d/bn' % (self._name, i)) + name='%s/dnn_%d/bn' % (self._name, i), + reuse=self._reuse) if (i + 1 < hidden_units_len) or not self._last_layer_no_activation: deep_fea = self.activation( deep_fea, name='%s/dnn_%d/act' % (self._name, i)) diff --git a/easy_rec/python/layers/fscd_layer.py b/easy_rec/python/layers/fscd_layer.py new file mode 100644 index 000000000..daccf750e --- /dev/null +++ b/easy_rec/python/layers/fscd_layer.py @@ -0,0 +1,201 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import json +import logging +import math +from collections import OrderedDict + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework.meta_graph import read_meta_graph_file + +from easy_rec.python.compat.sort_ops import argsort + +from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA +from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA +from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def get_feature_complexity(feature_configs): + feature_complexity = {} + for config in feature_configs: + name = config.input_names[0] + if config.HasField('feature_name'): + name = config.feature_name + feature_complexity[name] = config.complexity + return feature_complexity + + +def sigmoid(x): + return 1. / (1. + math.exp(-x)) + + +def get_feature_importance(pipeline_config, feature_group_name=None): + assert pipeline_config.model_config.HasField( + 'variational_dropout'), 'variational_dropout must be in model_config' + + checkpoint_path = tf.train.latest_checkpoint(pipeline_config.model_dir) + meta_graph_def = read_meta_graph_file(checkpoint_path + '.meta') + + features_map = dict() + for col_def in meta_graph_def.collection_def[ + 'variational_dropout'].bytes_list.value: + features = json.loads(col_def) + features_map.update(features) + + feature_importance = OrderedDict() + tf.logging.info('Reading checkpoint from %s ...' % checkpoint_path) + reader = tf.train.NewCheckpointReader(checkpoint_path) + for feature_group in pipeline_config.model_config.feature_groups: + group_name = feature_group.group_name + if feature_group_name is not None and feature_group_name != group_name: + continue + # assert group_name in features_map, "%s not in feature map" % group_name + if group_name not in features_map: + # for now, sequence feature groups are not supported + logging.warn('%s not in feature map' % group_name) + continue + + feature_dims = features_map[group_name] + + delta_name = 'fscd_delta_%s' % group_name + if not reader.has_tensor(delta_name): + logging.warn("feature group `%s` doesn't be involved in FSCD layer") + for feature, dim in feature_dims: + feature_importance[feature] = 1.0 + continue + + delta = reader.get_tensor(delta_name) + indices = argsort(delta, direction='DESCENDING') + keep_prob = tf.nn.sigmoid(delta) + with tf.Session() as sess: + idx = indices.eval(session=sess) + probs = keep_prob.eval(session=sess) + for i in idx: + feature = feature_dims[i][0] + if feature in feature_importance: + raw = feature_importance[feature] + if probs[i] > raw: + logging.info('%s importance change from %d to %d', feature, raw, + probs[i]) + feature_importance[feature] = probs[i] + else: + feature_importance[feature] = probs[i] + return feature_importance + + +class FSCDLayer(object): + """Rank features by variational dropout. + + paper: Towards a Better Tradeoff between Effectiveness and Efficiency in Pre-Ranking, + A Learnable Feature Selection based Approach + arXiv: 2105.07706 + """ + + def __init__(self, + feature_configs, + variational_dropout_config, + is_training=False, + name=''): + self._config = variational_dropout_config + self.is_training = is_training + self.name = name + self.feature_complexity = get_feature_complexity(feature_configs) + + def compute_dropout_mask(self, n): + delta_name = 'fscd_delta_%s' % self.name + delta = tf.get_variable( + name=delta_name, + shape=[n], + dtype=tf.float32, + initializer=tf.constant_initializer(0.)) + delta = tf.nn.sigmoid(delta) + epsilon = np.finfo(float).eps + max_keep_ratio = self._config.max_keep_ratio + min_keep_ratio = self._config.min_keep_ratio + if max_keep_ratio >= 1.0: + max_keep_ratio = 1.0 - epsilon + if min_keep_ratio <= 0.0: + min_keep_ratio = epsilon + delta = tf.clip_by_value(delta, min_keep_ratio, max_keep_ratio) + + unif_noise = tf.random_uniform([n], + dtype=tf.float32, + seed=None, + name='uniform_noise') + approx = ( + tf.log(delta) - tf.log(1. - delta) + tf.log(unif_noise) - + tf.log(1. - unif_noise)) + return tf.sigmoid(approx / self._config.temperature), delta + + def compute_regular_params(self, cols_to_feature): + alphas = {} + for fc, fea in cols_to_feature.items(): + dim = int(fea.shape[-1]) + complexity = self.feature_complexity[fc.raw_name] + cardinal = 1 + if isinstance(fc, EmbeddingColumn) or isinstance( + fc, _SharedEmbeddingColumn) or isinstance(fc, SharedEmbeddingColumn): + cardinal = fc.cardinality + c = self._config.feature_complexity_weight * complexity + c += self._config.feature_cardinality_weight * cardinal + c += self._config.feature_dimension_weight * dim + sig_c = sigmoid(c) + theta = 1.0 - sig_c + alpha = math.log(sig_c) - math.log(theta) + alphas[fc] = alpha + print( + str(fc.raw_name), 'complexity:', complexity, 'cardinality:', cardinal, + 'dimension:', dim, 'c:', c, 'theta:', theta, 'alpha:', alpha) + return alphas + + def __call__(self, cols_to_feature): + """cols_to_feature: an ordered dict mapping feature_column to feature_values.""" + feature_dimension = [] + output_tensors = [] + alphas = [] + z, delta = self.compute_dropout_mask(len(cols_to_feature)) # keep ratio + tf.summary.histogram('fscd_keep_ratio', delta) + tf.summary.histogram('fscd_keep_mask', z) + regular = self.compute_regular_params(cols_to_feature) + + feature_columns = cols_to_feature.keys() + for column in sorted(feature_columns, key=lambda x: x.name): + value = cols_to_feature[column] + alpha = regular[column] + i = len(output_tensors) + if self.is_training: + scaled_value = tf.div(value, delta[i]) + out = tf.multiply(scaled_value, z[i], name='fscd_dropout') + else: + out = value + cols_to_feature[column] = out + output_tensors.append(out) + alphas.append(alpha) + feature_dimension.append((column.raw_name, int(value.shape[-1]))) + + output_features = tf.concat(output_tensors, 1) + tf.add_to_collection('variational_dropout', + json.dumps({self.name: feature_dimension})) + + batch_size = tf.shape(output_features)[0] + t_alpha = tf.convert_to_tensor(alphas, dtype=tf.float32) + loss = tf.reduce_sum(t_alpha * z) / tf.to_float(batch_size) + + tf.add_to_collection('variational_dropout_loss', loss) + return output_features + + +# def dropout(p): +# u = np.random.uniform() +# x = math.log(p) - math.log(1-p) + math.log(u) - math.log(1-u) +# z = sigmoid(x/0.1) +# return z +# +# +# if __name__ == '__main__': +# for i in range(100): +# print(dropout(0.5)) diff --git a/easy_rec/python/layers/input_layer.py b/easy_rec/python/layers/input_layer.py index 731f47c82..4c36811fa 100644 --- a/easy_rec/python/layers/input_layer.py +++ b/easy_rec/python/layers/input_layer.py @@ -1,8 +1,11 @@ # -*- encoding: utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. +import logging +import os from collections import OrderedDict import tensorflow as tf +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope @@ -13,13 +16,12 @@ from easy_rec.python.layers import sequence_feature_layer from easy_rec.python.layers import variational_dropout_layer from easy_rec.python.layers.common_layers import text_cnn +from easy_rec.python.layers.fscd_layer import FSCDLayer from easy_rec.python.protos.feature_config_pb2 import WideOrDeep +from easy_rec.python.utils import conditional from easy_rec.python.utils import shape_utils -from easy_rec.python.compat.feature_column.feature_column_v2 import EmbeddingColumn # NOQA -from easy_rec.python.compat.feature_column.feature_column_v2 import SharedEmbeddingColumn # NOQA - -from easy_rec.python.compat.feature_column.feature_column import _SharedEmbeddingColumn # NOQA +from easy_rec.python.compat.feature_column.feature_column_v2 import is_embedding_column # NOQA class InputLayer(object): @@ -36,7 +38,9 @@ def __init__(self, ev_params=None, embedding_regularizer=None, kernel_regularizer=None, - is_training=False): + is_training=False, + is_predicting=False): + self._feature_configs = feature_configs self._feature_groups = { x.group_name: FeatureGroup(x) for x in feature_groups_config } @@ -62,6 +66,7 @@ def __init__(self, self._embedding_regularizer = embedding_regularizer self._kernel_regularizer = kernel_regularizer self._is_training = is_training + self._is_predicting = is_predicting self._variational_dropout_config = variational_dropout_config def has_group(self, group_name): @@ -92,8 +97,11 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): feature_name_to_output_tensors = {} negative_sampler = self._feature_groups[group_name]._config.negative_sampler if is_combine: - concat_features, group_features = self.single_call_input_layer( - features, group_name, feature_name_to_output_tensors) + place_on_cpu = os.getenv('place_embedding_on_cpu') + place_on_cpu = eval(place_on_cpu) if place_on_cpu else False + with conditional(self._is_predicting and place_on_cpu, ops.device('/CPU:0')): + concat_features, group_features = self.single_call_input_layer( + features, group_name, feature_name_to_output_tensors) if group_name in self._group_name_to_seq_features: # for target attention group_seq_arr = self._group_name_to_seq_features[group_name] @@ -116,19 +124,32 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): return concat_features, group_features else: # return sequence feature in raw format instead of combine them if self._variational_dropout_config is not None: - raise ValueError( + logging.warning( 'variational dropout is not supported in not combined mode now.') feature_group = self._feature_groups[group_name] group_columns, group_seq_columns = feature_group.select_columns( self._fc_parser) - assert len(group_columns) == 0, \ - 'there are none sequence columns: %s' % str(group_columns) + embedding_reg_lst = [] + output_features = None + group_features = [] + if group_columns: + cols_to_output_tensors = OrderedDict() + output_features = feature_column.input_layer( + features, + group_columns, + cols_to_output_tensors=cols_to_output_tensors, + feature_name_to_output_tensors=feature_name_to_output_tensors, + sort_feature_columns_by_name=False) + group_features = [cols_to_output_tensors[x] for x in group_columns] + + for col, val in cols_to_output_tensors.items(): + if is_embedding_column(col): + embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) seq_features = [] - embedding_reg_lst = [] for fc in group_seq_columns: with variable_scope.variable_scope('input_layer/' + fc.categorical_column.name): @@ -140,7 +161,7 @@ def __call__(self, features, group_name, is_combine=True, is_dict=False): embedding_reg_lst.append(tmp_embedding) regularizers.apply_regularization( self._embedding_regularizer, weights_list=embedding_reg_lst) - return seq_features + return seq_features, output_features, group_features def single_call_input_layer(self, features, @@ -169,18 +190,17 @@ def single_call_input_layer(self, group_columns, cols_to_output_tensors=cols_to_output_tensors, feature_name_to_output_tensors=feature_name_to_output_tensors) - # embedding_reg_lst = [output_features] + embedding_reg_lst = [] - for col, val in cols_to_output_tensors.items(): - if isinstance(col, EmbeddingColumn) or isinstance(col, - SharedEmbeddingColumn): - embedding_reg_lst.append(val) builder = feature_column._LazyBuilder(features) seq_features = [] for column in sorted(group_seq_columns, key=lambda x: x.name): with variable_scope.variable_scope( None, default_name=column._var_scope_name): - seq_feature, seq_len = column._get_sequence_dense_tensor(builder) + place_on_cpu = os.getenv('place_embedding_on_cpu') + place_on_cpu = eval(place_on_cpu) if place_on_cpu else False + with conditional(self._is_predicting and place_on_cpu, ops.device('/CPU:0')): + seq_feature, seq_len = column._get_sequence_dense_tensor(builder) embedding_reg_lst.append(seq_feature) sequence_combiner = column.sequence_combiner @@ -213,30 +233,47 @@ def single_call_input_layer(self, cols_to_output_tensors[column] = cnn_feature else: raise NotImplementedError + if self._variational_dropout_config is not None: - features_dimension = OrderedDict([ - (k.raw_name, int(v.shape[-1])) - for k, v in cols_to_output_tensors.items() - ]) - concat_features = array_ops.concat( - [output_features] + seq_features, axis=-1) - variational_dropout = variational_dropout_layer.VariationalDropoutLayer( - self._variational_dropout_config, - features_dimension, - self._is_training, - name=group_name) - concat_features = variational_dropout(concat_features) - group_features = tf.split( - concat_features, list(features_dimension.values()), axis=-1) + if self._variational_dropout_config.regularize_by_feature_complexity: + fscd = FSCDLayer( + self._feature_configs, + self._variational_dropout_config, + is_training=self._is_training, + name=group_name) + output_features = fscd(cols_to_output_tensors) + concat_features = array_ops.concat( + [output_features] + seq_features, axis=-1) + group_features = [cols_to_output_tensors[x] for x in group_columns] + \ + [cols_to_output_tensors[x] for x in group_seq_columns] + else: + features_dimension = OrderedDict([ + (k.raw_name, int(v.shape[-1])) + for k, v in cols_to_output_tensors.items() + ]) + concat_features = array_ops.concat( + [output_features] + seq_features, axis=-1) + variational_dropout = variational_dropout_layer.VariationalDropoutLayer( + self._variational_dropout_config, + features_dimension, + self._is_training, + name=group_name) + concat_features = variational_dropout(concat_features) + group_features = tf.split( + concat_features, list(features_dimension.values()), axis=-1) else: concat_features = array_ops.concat( [output_features] + seq_features, axis=-1) group_features = [cols_to_output_tensors[x] for x in group_columns] + \ [cols_to_output_tensors[x] for x in group_seq_columns] - if embedding_reg_lst: - regularizers.apply_regularization( - self._embedding_regularizer, weights_list=embedding_reg_lst) + for fc, val in cols_to_output_tensors.items(): + if is_embedding_column(fc): + embedding_reg_lst.append(val) + + if embedding_reg_lst: + regularizers.apply_regularization( + self._embedding_regularizer, weights_list=embedding_reg_lst) return concat_features, group_features def get_wide_deep_dict(self): diff --git a/easy_rec/python/layers/keras/__init__.py b/easy_rec/python/layers/keras/__init__.py new file mode 100644 index 000000000..39d7c8be8 --- /dev/null +++ b/easy_rec/python/layers/keras/__init__.py @@ -0,0 +1,15 @@ +from .blocks import MLP +from .blocks import Gate +from .blocks import Highway +from .bst import BST +from .din import DIN +from .fibinet import BiLinear +from .fibinet import FiBiNet +from .fibinet import SENet +from .interaction import FM +from .interaction import Cross +from .interaction import DotInteraction +from .mask_net import MaskBlock +from .mask_net import MaskNet +from .numerical_embedding import AutoDisEmbedding +from .numerical_embedding import PeriodicEmbedding diff --git a/easy_rec/python/layers/keras/blocks.py b/easy_rec/python/layers/keras/blocks.py new file mode 100644 index 000000000..1a6715a8e --- /dev/null +++ b/easy_rec/python/layers/keras/blocks.py @@ -0,0 +1,158 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +"""Convenience blocks for building models.""" +import logging + +import tensorflow as tf + +from easy_rec.python.utils.activation import get_activation + + +class MLP(tf.keras.layers.Layer): + """Sequential multi-layer perceptron (MLP) block. + + Attributes: + units: Sequential list of layer sizes. + use_bias: Whether to include a bias term. + activation: Type of activation to use on all except the last layer. + final_activation: Type of activation to use on last layer. + **kwargs: Extra args passed to the Keras Layer base class. + """ + + def __init__(self, params, name='mlp', **kwargs): + super(MLP, self).__init__(name=name, **kwargs) + params.check_required('hidden_units') + use_bn = params.get_or_default('use_bn', True) + use_final_bn = params.get_or_default('use_final_bn', True) + use_bias = params.get_or_default('use_bias', True) + dropout_rate = list(params.get_or_default('dropout_ratio', [])) + activation = params.get_or_default('activation', 'relu') + initializer = params.get_or_default('initializer', 'he_uniform') + final_activation = params.get_or_default('final_activation', None) + use_bn_after_act = params.get_or_default('use_bn_after_activation', False) + units = list(params.hidden_units) + logging.info( + 'MLP(%s) units: %s, dropout: %r, activate=%s, use_bn=%r, final_bn=%r,' + ' final_activate=%s, bias=%r, initializer=%s, bn_after_activation=%r' % + (name, units, dropout_rate, activation, use_bn, use_final_bn, + final_activation, use_bias, initializer, use_bn_after_act)) + + num_dropout = len(dropout_rate) + self._sub_layers = [] + for i, num_units in enumerate(units[:-1]): + name = 'dnn_%d' % i + drop_rate = dropout_rate[i] if i < num_dropout else 0.0 + self.add_rich_layer(num_units, use_bn, drop_rate, activation, initializer, + use_bias, use_bn_after_act, name, + params.l2_regularizer) + + n = len(units) - 1 + drop_rate = dropout_rate[n] if num_dropout > n else 0.0 + name = 'dnn_%d' % n + self.add_rich_layer(units[-1], use_final_bn, drop_rate, final_activation, + initializer, use_bias, use_bn_after_act, name, + params.l2_regularizer) + + def add_rich_layer(self, + num_units, + use_bn, + dropout_rate, + activation, + initializer, + use_bias=True, + use_bn_after_activation=False, + name='mlp', + l2_reg=None): + + def batch_norm(x, training): + return tf.layers.batch_normalization( + x, training=training, name='%s/%s/bn' % (self.name, name)) + + act_fn = get_activation(activation) + if use_bn and not use_bn_after_activation: + dense = tf.keras.layers.Dense( + units=num_units, + use_bias=use_bias, + kernel_initializer=initializer, + kernel_regularizer=l2_reg, + name=name) + self._sub_layers.append(dense) + + # bn = tf.keras.layers.BatchNormalization(name='%s/bn' % name) + # keras BN layer have a stale issue on some versions of tf + self._sub_layers.append(batch_norm) + act = tf.keras.layers.Activation(act_fn, name='%s/act' % name) + self._sub_layers.append(act) + else: + dense = tf.keras.layers.Dense( + num_units, + activation=act_fn, + use_bias=use_bias, + kernel_initializer=initializer, + kernel_regularizer=l2_reg, + name=name) + self._sub_layers.append(dense) + if use_bn and use_bn_after_activation: + self._sub_layers.append(batch_norm) + + if 0.0 < dropout_rate < 1.0: + dropout = tf.keras.layers.Dropout(dropout_rate, name='%s/dropout' % name) + self._sub_layers.append(dropout) + elif dropout_rate >= 1.0: + raise ValueError('invalid dropout_ratio: %.3f' % dropout_rate) + + def call(self, x, training=None, **kwargs): + """Performs the forward computation of the block.""" + from inspect import isfunction + for layer in self._sub_layers: + if isfunction(layer): + x = layer(x, training=training) + else: + cls = layer.__class__.__name__ + if cls in ('Dropout', 'BatchNormalization'): + x = layer(x, training=training) + else: + x = layer(x) + return x + + +class Highway(tf.keras.layers.Layer): + + def __init__(self, params, name='highway', **kwargs): + super(Highway, self).__init__(name, **kwargs) + params.check_required('emb_size') + self.emb_size = params.emb_size + self.num_layers = params.get_or_default('num_layers', 1) + self.activation = params.get_or_default('activation', 'gelu') + self.dropout_rate = params.get_or_default('dropout_rate', 0.0) + + def call(self, inputs, training=None, **kwargs): + from easy_rec.python.layers.common_layers import highway + return highway( + inputs, + self.emb_size, + activation=self.activation, + num_layers=self.num_layers, + dropout=self.dropout_rate if training else 0.0) + + +class Gate(tf.keras.layers.Layer): + """Weighted sum gate.""" + + def __init__(self, params, name='gate', **kwargs): + super(Gate, self).__init__(name, **kwargs) + self.weight_index = params.get_or_default("weight_index", 0) + + def call(self, inputs, **kwargs): + assert len(inputs) > 1, 'input of Gate layer must be a list containing at least 2 elements' + weights = inputs[self.weight_index] + j = 0 + for i, x in enumerate(inputs): + if i == self.weight_index: + continue + if j == 0: + output = weights[:, j, None] * x + else: + output += weights[:, j, None] * x + j += 1 + return output diff --git a/easy_rec/python/layers/keras/bst.py b/easy_rec/python/layers/keras/bst.py new file mode 100644 index 000000000..f8b876fb4 --- /dev/null +++ b/easy_rec/python/layers/keras/bst.py @@ -0,0 +1,122 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf +from tensorflow.python.keras.layers import Layer + +from easy_rec.python.input.augment import input_aug_data +from easy_rec.python.layers import multihead_cross_attention +from easy_rec.python.loss.nce_loss import nce_loss +from easy_rec.python.utils.activation import get_activation +from easy_rec.python.utils.shape_utils import get_shape_list + + +class BST(Layer): + + def __init__(self, params, name='bst', l2_reg=None, **kwargs): + super(BST, self).__init__(name=name, **kwargs) + self.l2_reg = l2_reg + self.config = params.get_pb_config() + + def encode(self, seq_input, max_position): + seq_fea = multihead_cross_attention.embedding_postprocessor( + seq_input, + position_embedding_name=self.name + '/position_embeddings', + max_position_embeddings=max_position, + reuse_position_embedding=tf.AUTO_REUSE) + + n = tf.count_nonzero(seq_input, axis=-1) + seq_mask = tf.cast(n > 0, tf.int32) + + attention_mask = multihead_cross_attention.create_attention_mask_from_input_mask( + from_tensor=seq_fea, to_mask=seq_mask) + + hidden_act = get_activation(self.config.hidden_act) + attention_fea = multihead_cross_attention.transformer_encoder( + seq_fea, + hidden_size=self.config.hidden_size, + num_hidden_layers=self.config.num_hidden_layers, + num_attention_heads=self.config.num_attention_heads, + attention_mask=attention_mask, + intermediate_size=self.config.intermediate_size, + intermediate_act_fn=hidden_act, + hidden_dropout_prob=self.config.hidden_dropout_prob, + attention_probs_dropout_prob=self.config.attention_probs_dropout_prob, + initializer_range=self.config.initializer_range, + name=self.name + '/transformer', + reuse=tf.AUTO_REUSE) + # attention_fea shape: [batch_size, seq_length, hidden_size] + out_fea = attention_fea[:, 0, :] # target feature + print('bst output shape:', out_fea.shape) + return out_fea + + def call(self, inputs, training=None, **kwargs): + seq_features, target_features = inputs + assert len(seq_features) > 0, '[%s] sequence feature is empty' % self.name + if not training: + self.config.hidden_dropout_prob = 0.0 + self.config.attention_probs_dropout_prob = 0.0 + + seq_embeds = [seq_fea for seq_fea, _ in seq_features] + + max_position = self.config.max_position_embeddings + # max_seq_len: the max sequence length in current mini-batch, all sequences are padded to this length + batch_size, max_seq_len, _ = get_shape_list(seq_features[0][0], 3) + valid_len = tf.assert_less_equal( + max_seq_len, + max_position, + message='sequence length is greater than `max_position_embeddings`:' + + str(max_position) + ' in feature group:' + self.name) + with tf.control_dependencies([valid_len]): + # seq_input: [batch_size, seq_len, embed_size] + seq_input = tf.concat(seq_embeds, axis=-1) + if len(target_features) > 0: + max_position += 1 + + seq_embed_size = seq_input.shape.as_list()[-1] + if seq_embed_size != self.config.hidden_size: + seq_input = tf.layers.dense( + seq_input, + self.config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self.l2_reg) + + # seq_len: [batch_size, 1], the true length of each sequence + seq_len = seq_features[0][1] + + if self.config.need_contrastive_learning: + assert 'loss_dict' in kwargs, 'no `loss_dict` in kwargs of bst layer: %s' % self.name + loss = self.contrastive_loss(seq_input, seq_len, max_position) + if self.config.auto_contrastive_loss_weight: + uncertainty = tf.Variable( + 0, name='%s_contrastive_loss_weight' % self.name, dtype=tf.float32) + loss = tf.exp(-uncertainty) * loss + 0.5 * uncertainty + else: + loss *= self.config.contrastive_loss_weight + loss_dict = kwargs['loss_dict'] + loss_dict['%s_contrastive_loss' % self.name] = loss + # tf.summary.scalar('loss/%s_contrastive_loss' % self.name, loss) + + if len(target_features) > 0: + target_feature = tf.concat(target_features, axis=-1) + target_size = target_feature.shape.as_list()[-1] + assert seq_embed_size == target_size, 'the embedding size of sequence and target item is not equal' \ + ' in feature group:' + self.name + if target_size != self.config.hidden_size: + target_feature = tf.layers.dense( + target_feature, + self.config.hidden_size, + activation=tf.nn.relu, + kernel_regularizer=self.l2_reg) + # target_feature: [batch_size, 1, embed_size] + target_feature = tf.expand_dims(target_feature, 1) + # seq_input: [batch_size, seq_len+1, embed_size] + seq_input = tf.concat([target_feature, seq_input], axis=1) + + return self.encode(seq_input, max_position) + + def contrastive_loss(self, seq_input, seq_len, max_position): + aug_seq1, aug_seq2, aug_len1, aug_len2 = input_aug_data(seq_input, seq_len) + seq_output1 = self.encode(aug_seq1, max_position) + seq_output2 = self.encode(aug_seq2, max_position) + loss = nce_loss(seq_output1, seq_output2) + return loss diff --git a/easy_rec/python/layers/keras/din.py b/easy_rec/python/layers/keras/din.py new file mode 100644 index 000000000..cee57ac90 --- /dev/null +++ b/easy_rec/python/layers/keras/din.py @@ -0,0 +1,73 @@ +# -*- encoding: utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import tensorflow as tf +from tensorflow.python.keras.layers import Layer + +from easy_rec.python.layers import dnn +from easy_rec.python.utils.shape_utils import get_shape_list + + +class DIN(Layer): + + def __init__(self, params, name='din', l2_reg=None, **kwargs): + super(DIN, self).__init__(name=name, **kwargs) + self.l2_reg = l2_reg + self.config = params.get_pb_config() + + def call(self, inputs, training=None, **kwargs): + seq_features, target_features = inputs + assert len(seq_features) > 0, '[%s] sequence feature is empty' % self.name + assert len(target_features) > 0, '[%s] target feature is empty' % self.name + + query = tf.concat(target_features, axis=-1) + seq_input = [seq_fea for seq_fea, _ in seq_features] + keys = tf.concat(seq_input, axis=-1) + + query_emb_size = int(query.shape[-1]) + seq_emb_size = keys.shape.as_list()[-1] + if query_emb_size != seq_emb_size: + logging.info( + ' the embedding size of sequence [%d] and target item [%d] is not equal' + ' in feature group: %s', seq_emb_size, query_emb_size, self.name) + if query_emb_size < seq_emb_size: + query = tf.pad(query, [[0, 0], [0, seq_emb_size - query_emb_size]]) + else: + assert False, 'the embedding size of target item is larger than the one of sequence' + + batch_size, max_seq_len, _ = get_shape_list(keys, 3) + queries = tf.tile(tf.expand_dims(query, 1), [1, max_seq_len, 1]) + din_all = tf.concat([queries, keys, queries - keys, queries * keys], + axis=-1) + din_layer = dnn.DNN( + self.config.attention_dnn, + self.l2_reg, + self.name + '/din_attention', + training, + last_layer_no_activation=True, + last_layer_no_batch_norm=True) + output = din_layer(din_all) # [B, L, 1] + scores = tf.transpose(output, [0, 2, 1]) # [B, 1, L] + + seq_len = seq_features[0][1] + seq_mask = tf.sequence_mask(seq_len, max_seq_len, dtype=tf.bool) + seq_mask = tf.expand_dims(seq_mask, 1) + paddings = tf.ones_like(scores) * (-2**32 + 1) + scores = tf.where(seq_mask, scores, paddings) # [B, 1, L] + if self.config.attention_normalizer == 'softmax': + scores = tf.nn.softmax(scores) # (B, 1, L) + elif self.config.attention_normalizer == 'sigmoid': + scores = scores / (seq_emb_size**0.5) + scores = tf.nn.sigmoid(scores) + else: + raise ValueError('unsupported attention normalizer: ' + + self.config.attention_normalizer) + + if query_emb_size < seq_emb_size: + keys = keys[:, :, :query_emb_size] # [B, L, E] + output = tf.squeeze(tf.matmul(scores, keys), axis=[1]) + if self.config.need_target_feature: + output = tf.concat([output, query], axis=-1) + print('din output shape:', output.shape) + return output diff --git a/easy_rec/python/layers/keras/fibinet.py b/easy_rec/python/layers/keras/fibinet.py new file mode 100644 index 000000000..98cdb3179 --- /dev/null +++ b/easy_rec/python/layers/keras/fibinet.py @@ -0,0 +1,245 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import itertools +import logging + +import tensorflow as tf + +from easy_rec.python.layers.common_layers import layer_norm +from easy_rec.python.layers.keras.blocks import MLP +from easy_rec.python.layers.utils import Parameter + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class SENet(tf.keras.layers.Layer): + """SENET Layer used in FiBiNET. + + Input shape + - A list of 2D tensor with shape: ``(batch_size,embedding_size)``. + The ``embedding_size`` of each field can have different value. + + Output shape + - A 2D tensor with shape: ``(batch_size,sum_of_embedding_size)``. + + References: + 1. [FiBiNET](https://arxiv.org/pdf/1905.09433.pdf) + Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction + 2. [FiBiNet++](https://arxiv.org/pdf/2209.05016.pdf) + Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction + """ + + def __init__(self, params, name='SENet', **kwargs): + super(SENet, self).__init__(name, **kwargs) + self.config = params.get_pb_config() + + def call(self, inputs, **kwargs): + g = self.config.num_squeeze_group + for emb in inputs: + assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + dim = int(emb.shape[-1]) + assert dim >= g and dim % g == 0, 'field embedding dimension %d must be divisible by %d' % ( + dim, g) + + field_size = len(inputs) + feature_size_list = [emb.shape.as_list()[-1] for emb in inputs] + + # Squeeze + # embedding dimension 必须能被 g 整除 + group_embs = [ + tf.reshape(emb, [-1, g, int(emb.shape[-1]) // g]) for emb in inputs + ] + + squeezed = [] + for emb in group_embs: + squeezed.append(tf.reduce_max(emb, axis=-1)) # [B, g] + squeezed.append(tf.reduce_mean(emb, axis=-1)) # [B, g] + z = tf.concat(squeezed, axis=1) # [bs, field_size * num_groups * 2] + + # Excitation + r = self.config.reduction_ratio + reduction_size = max(1, field_size * g * 2 // r) + + initializer = tf.glorot_normal_initializer() + a1 = tf.layers.dense( + z, + reduction_size, + kernel_initializer=initializer, + activation=tf.nn.relu, + name='%s/W1' % self.name) + weights = tf.layers.dense( + a1, + sum(feature_size_list), + kernel_initializer=initializer, + name='%s/W2' % self.name) + + # Re-weight + inputs = tf.concat(inputs, axis=-1) + output = inputs * weights + + # Fuse, add skip-connection + if self.config.use_skip_connection: + output += inputs + + # Layer Normalization + if self.config.use_output_layer_norm: + output = layer_norm(output) + return output + + +def _full_interaction(v_i, v_j): + # [bs, 1, dim] x [bs, dim, 1] = [bs, 1] + interaction = tf.matmul( + tf.expand_dims(v_i, axis=1), tf.expand_dims(v_j, axis=-1)) + return tf.squeeze(interaction, axis=1) + + +class BiLinear(tf.keras.layers.Layer): + """BilinearInteraction Layer used in FiBiNET. + + Input shape + - A list of 2D tensor with shape: ``(batch_size,embedding_size)``. + Its length is ``filed_size``. + The ``embedding_size`` of each field can have different value. + + Output shape + - 2D tensor with shape: ``(batch_size,output_size)``. + + Attributes: + num_output_units: the number of output units + type: ['all', 'each', 'interaction'], types of bilinear functions used in this layer + use_plus: whether to use bi-linear+ + + References: + 1. [FiBiNET](https://arxiv.org/pdf/1905.09433.pdf) + Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction + 2. [FiBiNet++](https://arxiv.org/pdf/2209.05016.pdf) + Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction + """ + + def __init__(self, params, name='bilinear', **kwargs): + super(BiLinear, self).__init__(name, **kwargs) + params.check_required(['num_output_units']) + bilinear_plus = params.get_or_default('use_plus', True) + self.bilinear_type = params.get_or_default('type', 'interaction').lower() + self.output_size = params.num_output_units + + if self.bilinear_type not in ['all', 'each', 'interaction']: + raise NotImplementedError( + "bilinear_type only support: ['all', 'each', 'interaction']") + + if bilinear_plus: + self.func = _full_interaction + else: + self.func = tf.multiply + + def call(self, inputs, **kwargs): + embeddings = inputs + logging.info('Bilinear Layer with %d inputs' % len(embeddings)) + if len(embeddings) > 200: + logging.warning('There are too many inputs for bilinear layer: %d' % + len(embeddings)) + equal_dim = True + _dim = embeddings[0].shape[-1] + for emb in embeddings: + assert emb.shape.ndims == 2, 'field embeddings must be rank 2 tensors' + if emb.shape[-1] != _dim: + equal_dim = False + if not equal_dim and self.bilinear_type != 'interaction': + raise ValueError( + 'all embedding dimensions must be same when not use bilinear type: interaction' + ) + dim = int(_dim) + + field_size = len(embeddings) + initializer = tf.glorot_normal_initializer() + + # bi-linear+: p的维度为[bs, f*(f-1)/2] + # bi-linear: + # 当equal_dim=True时,p的维度为[bs, f*(f-1)/2*k],k为embeddings的size + # 当equal_dim=False时,p的维度为[bs, (k_2+k_3+...+k_f)+...+(k_i+k_{i+1}+...+k_f)+...+k_f], + # 其中 k_i为第i个field的embedding的size + if self.bilinear_type == 'all': + v_dot = [ + tf.layers.dense( + v_i, + dim, + kernel_initializer=initializer, + name='%s/all' % self.name, + reuse=tf.AUTO_REUSE) for v_i in embeddings[:-1] + ] + p = [ + self.func(v_dot[i], embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + elif self.bilinear_type == 'each': + v_dot = [ + tf.layers.dense( + v_i, + dim, + kernel_initializer=initializer, + name='%s/each_%d' % (self.name, i), + reuse=tf.AUTO_REUSE) for i, v_i in enumerate(embeddings[:-1]) + ] + p = [ + self.func(v_dot[i], embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + else: # interaction + p = [ + self.func( + tf.layers.dense( + embeddings[i], + embeddings[j].shape.as_list()[-1], + kernel_initializer=initializer, + name='%s/interaction_%d_%d' % (self.name, i, j), + reuse=tf.AUTO_REUSE), embeddings[j]) + for i, j in itertools.combinations(range(field_size), 2) + ] + + output = tf.layers.dense( + tf.concat(p, axis=-1), self.output_size, kernel_initializer=initializer) + return output + + +class FiBiNet(tf.keras.layers.Layer): + """FiBiNet++:Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction. + + References: + - [FiBiNet++](https://arxiv.org/pdf/2209.05016.pdf) + Improving FiBiNet by Greatly Reducing Model Size for CTR Prediction + """ + + def __init__(self, params, name='fibinet', **kwargs): + super(FiBiNet, self).__init__(name, **kwargs) + self._config = params.get_pb_config() + if self._config.HasField('mlp'): + p = Parameter.make_from_pb(self._config.mlp) + p.l2_regularizer = params.l2_regularizer + self.final_mlp = MLP(p, name=name) + else: + self.final_mlp = None + + def call(self, inputs, training=None, **kwargs): + feature_list = [] + + params = Parameter.make_from_pb(self._config.senet) + senet = SENet(params, name='%s/senet' % self.name) + senet_output = senet(inputs) + feature_list.append(senet_output) + + if self._config.HasField('bilinear'): + params = Parameter.make_from_pb(self._config.bilinear) + bilinear = BiLinear(params, name='%s/bilinear' % self.name) + bilinear_output = bilinear(inputs) + feature_list.append(bilinear_output) + + if len(feature_list) > 1: + feature = tf.concat(feature_list, axis=-1) + else: + feature = feature_list[0] + + if self.final_mlp is not None: + feature = self.final_mlp(feature, training=training) + return feature diff --git a/easy_rec/python/layers/keras/interaction.py b/easy_rec/python/layers/keras/interaction.py new file mode 100644 index 000000000..55f56f7a1 --- /dev/null +++ b/easy_rec/python/layers/keras/interaction.py @@ -0,0 +1,312 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +from easy_rec.python.utils.activation import get_activation + + +class FM(tf.keras.layers.Layer): + """Factorization Machine models pairwise (order-2) feature interactions without linear term and bias. + + References + - [Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) + Input shape. + - List of 2D tensor with shape: ``(batch_size,embedding_size)``. + - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` + Output shape + - 2D tensor with shape: ``(batch_size, 1)``. + """ + + def __init__(self, params, name='fm', **kwargs): + super(FM, self).__init__(name, **kwargs) + self.use_variant = params.get_or_default('use_variant', False) + + def call(self, inputs, **kwargs): + if type(inputs) == list: + emb_dims = set(map(lambda x: int(x.shape[-1]), inputs)) + if len(emb_dims) != 1: + dims = ','.join([str(d) for d in emb_dims]) + raise ValueError('all embedding dim must be equal in FM layer:' + dims) + with tf.name_scope(self.name): + fea = tf.stack(inputs, axis=1) + else: + assert inputs.shape.ndims == 3, 'input of FM layer must be a 3D tensor or a list of 2D tensors' + fea = inputs + + with tf.name_scope(self.name): + square_of_sum = tf.square(tf.reduce_sum(fea, axis=1)) + sum_of_square = tf.reduce_sum(tf.square(fea), axis=1) + cross_term = tf.subtract(square_of_sum, sum_of_square) + if self.use_variant: + cross_term = 0.5 * cross_term + else: + cross_term = 0.5 * tf.reduce_sum(cross_term, axis=-1, keepdims=True) + return cross_term + + +class DotInteraction(tf.keras.layers.Layer): + """Dot interaction layer of DLRM model.. + + See theory in the DLRM paper: https://arxiv.org/pdf/1906.00091.pdf, + section 2.1.3. Sparse activations and dense activations are combined. + Dot interaction is applied to a batch of input Tensors [e1,...,e_k] of the + same dimension and the output is a batch of Tensors with all distinct pairwise + dot products of the form dot(e_i, e_j) for i <= j if self self_interaction is + True, otherwise dot(e_i, e_j) i < j. + + Attributes: + self_interaction: Boolean indicating if features should self-interact. + If it is True, then the diagonal entries of the interaction metric are + also taken. + skip_gather: An optimization flag. If it's set then the upper triangle part + of the dot interaction matrix dot(e_i, e_j) is set to 0. The resulting + activations will be of dimension [num_features * num_features] from which + half will be zeros. Otherwise activations will be only lower triangle part + of the interaction matrix. The later saves space but is much slower. + name: String name of the layer. + """ + + def __init__(self, params, name=None, **kwargs): + self._self_interaction = params.get_or_default('self_interaction', False) + self._skip_gather = params.get_or_default('skip_gather', False) + super(DotInteraction, self).__init__(name=name, **kwargs) + + def call(self, inputs, **kwargs): + """Performs the interaction operation on the tensors in the list. + + The tensors represent as transformed dense features and embedded categorical + features. + Pre-condition: The tensors should all have the same shape. + + Args: + inputs: List of features with shapes [batch_size, feature_dim]. + + Returns: + activations: Tensor representing interacted features. It has a dimension + `num_features * num_features` if skip_gather is True, otherside + `num_features * (num_features + 1) / 2` if self_interaction is True and + `num_features * (num_features - 1) / 2` if self_interaction is False. + """ + if isinstance(inputs, (list, tuple)): + # concat_features shape: batch_size, num_features, feature_dim + try: + concat_features = tf.stack(inputs, axis=1) + except (ValueError, tf.errors.InvalidArgumentError) as e: + raise ValueError('Input tensors` dimensions must be equal, original' + 'error message: {}'.format(e)) + else: + assert inputs.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' + concat_features = inputs + + batch_size = tf.shape(concat_features)[0] + + # Interact features, select lower-triangular portion, and re-shape. + xactions = tf.matmul(concat_features, concat_features, transpose_b=True) + num_features = xactions.shape[-1] + ones = tf.ones_like(xactions) + if self._self_interaction: + # Selecting lower-triangular portion including the diagonal. + lower_tri_mask = tf.linalg.band_part(ones, -1, 0) + upper_tri_mask = ones - lower_tri_mask + out_dim = num_features * (num_features + 1) // 2 + else: + # Selecting lower-triangular portion not included the diagonal. + upper_tri_mask = tf.linalg.band_part(ones, 0, -1) + lower_tri_mask = ones - upper_tri_mask + out_dim = num_features * (num_features - 1) // 2 + + if self._skip_gather: + # Setting upper triangle part of the interaction matrix to zeros. + activations = tf.where( + condition=tf.cast(upper_tri_mask, tf.bool), + x=tf.zeros_like(xactions), + y=xactions) + out_dim = num_features * num_features + else: + activations = tf.boolean_mask(xactions, lower_tri_mask) + activations = tf.reshape(activations, (batch_size, out_dim)) + return activations + + +class Cross(tf.keras.layers.Layer): + """Cross Layer in Deep & Cross Network to learn explicit feature interactions. + + A layer that creates explicit and bounded-degree feature interactions + efficiently. The `call` method accepts `inputs` as a tuple of size 2 + tensors. The first input `x0` is the base layer that contains the original + features (usually the embedding layer); the second input `xi` is the output + of the previous `Cross` layer in the stack, i.e., the i-th `Cross` + layer. For the first `Cross` layer in the stack, x0 = xi. + + The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, + where .* designates elementwise multiplication, W could be a full-rank + matrix, or a low-rank matrix U*V to reduce the computational cost, and + diag_scale increases the diagonal of W to improve training stability ( + especially for the low-rank case). + + References: + 1. [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) + See Eq. (1) for full-rank and Eq. (2) for low-rank version. + 2. [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) + + Example: + + ```python + # after embedding layer in a functional model: + input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64) + x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6) + x1 = Cross()(x0, x0) + x2 = Cross()(x0, x1) + logits = tf.keras.layers.Dense(units=10)(x2) + model = tf.keras.Model(input, logits) + ``` + + Args: + projection_dim: project dimension to reduce the computational cost. + Default is `None` such that a full (`input_dim` by `input_dim`) matrix + W is used. If enabled, a low-rank matrix W = U*V will be used, where U + is of size `input_dim` by `projection_dim` and V is of size + `projection_dim` by `input_dim`. `projection_dim` need to be smaller + than `input_dim`/2 to improve the model efficiency. In practice, we've + observed that `projection_dim` = d/4 consistently preserved the + accuracy of a full-rank version. + diag_scale: a non-negative float used to increase the diagonal of the + kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an + identity matrix. + use_bias: whether to add a bias term for this layer. If set to False, + no bias term will be used. + preactivation: Activation applied to output matrix of the layer, before + multiplication with the input. Can be used to control the scale of the + layer's outputs and improve stability. + kernel_initializer: Initializer to use on the kernel matrix. + bias_initializer: Initializer to use on the bias vector. + kernel_regularizer: Regularizer to use on the kernel matrix. + bias_regularizer: Regularizer to use on bias vector. + + Input shape: A tuple of 2 (batch_size, `input_dim`) dimensional inputs. + Output shape: A single (batch_size, `input_dim`) dimensional output. + """ + + def __init__(self, params, **kwargs): + super(Cross, self).__init__(**kwargs) + self._projection_dim = params.get_or_default('projection_dim', None) + self._diag_scale = params.get_or_default('diag_scale', 0.0) + self._use_bias = params.get_or_default('use_bias', True) + preactivation = params.get_or_default('preactivation', None) + preact = get_activation(preactivation) + self._preactivation = tf.keras.activations.get(preact) + kernel_initializer = params.get_or_default('kernel_initializer', + 'truncated_normal') + self._kernel_initializer = tf.keras.initializers.get(kernel_initializer) + bias_initializer = params.get_or_default('bias_initializer', 'zeros') + self._bias_initializer = tf.keras.initializers.get(bias_initializer) + kernel_regularizer = params.get_or_default('kernel_regularizer', None) + self._kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer) + bias_regularizer = params.get_or_default('bias_regularizer', None) + self._bias_regularizer = tf.keras.regularizers.get(bias_regularizer) + self._input_dim = None + self._supports_masking = True + + if self._diag_scale < 0: # pytype: disable=unsupported-operands + raise ValueError( + '`diag_scale` should be non-negative. Got `diag_scale` = {}'.format( + self._diag_scale)) + + def build(self, input_shape): + last_dim = input_shape[0][-1] + + if self._projection_dim is None: + self._dense = tf.keras.layers.Dense( + last_dim, + kernel_initializer=_clone_initializer(self._kernel_initializer), + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_bias=self._use_bias, + dtype=self.dtype, + activation=self._preactivation, + ) + else: + self._dense_u = tf.keras.layers.Dense( + self._projection_dim, + kernel_initializer=_clone_initializer(self._kernel_initializer), + kernel_regularizer=self._kernel_regularizer, + use_bias=False, + dtype=self.dtype, + ) + self._dense_v = tf.keras.layers.Dense( + last_dim, + kernel_initializer=_clone_initializer(self._kernel_initializer), + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + use_bias=self._use_bias, + dtype=self.dtype, + activation=self._preactivation, + ) + self.built = True + + def call(self, inputs, **kwargs): + """Computes the feature cross. + + Args: + inputs: The input tensor(x0, x) + - x0: The input tensor + - x: Optional second input tensor. If provided, the layer will compute + crosses between x0 and x; if not provided, the layer will compute + crosses between x0 and itself. + + Returns: + Tensor of crosses. + """ + if isinstance(inputs, (list, tuple)): + x0, x = inputs + else: + x0, x = inputs, inputs + + if not self.built: + self.build(x0.shape) + + if x0.shape[-1] != x.shape[-1]: + raise ValueError( + '`x0` and `x` dimension mismatch! Got `x0` dimension {}, and x ' + 'dimension {}. This case is not supported yet.'.format( + x0.shape[-1], x.shape[-1])) + + if self._projection_dim is None: + prod_output = self._dense(x) + else: + prod_output = self._dense_v(self._dense_u(x)) + + # prod_output = tf.cast(prod_output, self.compute_dtype) + + if self._diag_scale: + prod_output = prod_output + self._diag_scale * x + + return x0 * prod_output + x + + def get_config(self): + config = { + 'projection_dim': + self._projection_dim, + 'diag_scale': + self._diag_scale, + 'use_bias': + self._use_bias, + 'preactivation': + tf.keras.activations.serialize(self._preactivation), + 'kernel_initializer': + tf.keras.initializers.serialize(self._kernel_initializer), + 'bias_initializer': + tf.keras.initializers.serialize(self._bias_initializer), + 'kernel_regularizer': + tf.keras.regularizers.serialize(self._kernel_regularizer), + 'bias_regularizer': + tf.keras.regularizers.serialize(self._bias_regularizer), + } + base_config = super(Cross, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +def _clone_initializer(initializer): + return initializer.__class__.from_config(initializer.get_config()) diff --git a/easy_rec/python/layers/keras/mask_net.py b/easy_rec/python/layers/keras/mask_net.py new file mode 100644 index 000000000..fa1503b11 --- /dev/null +++ b/easy_rec/python/layers/keras/mask_net.py @@ -0,0 +1,135 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +from easy_rec.python.layers.common_layers import layer_norm +from easy_rec.python.layers.keras.blocks import MLP +from easy_rec.python.layers.utils import Parameter + + +class MaskBlock(tf.keras.layers.Layer): + """MaskBlock use in MaskNet. + + Args: + projection_dim: project dimension to reduce the computational cost. + Default is `None` such that a full (`input_dim` by `aggregation_size`) matrix + W is used. If enabled, a low-rank matrix W = U*V will be used, where U + is of size `input_dim` by `projection_dim` and V is of size + `projection_dim` by `aggregation_size`. `projection_dim` need to be smaller + than `aggregation_size`/2 to improve the model efficiency. In practice, we've + observed that `projection_dim` = d/4 consistently preserved the + accuracy of a full-rank version. + """ + + def __init__(self, params, name='mask_block', reuse=None, **kwargs): + super(MaskBlock, self).__init__(name, **kwargs) + self.config = params.get_pb_config() + self.l2_reg = params.l2_regularizer + self._projection_dim = params.get_or_default('projection_dim', None) + self.reuse = reuse + + def call(self, inputs, **kwargs): + net, mask_input = inputs + mask_input_dim = int(mask_input.shape[-1]) + if self.config.HasField('reduction_factor'): + aggregation_size = int(mask_input_dim * self.config.reduction_factor) + elif self.config.HasField('aggregation_size') is not None: + aggregation_size = self.config.aggregation_size + else: + raise ValueError( + 'Need one of reduction factor or aggregation size for MaskBlock.') + + if self.config.input_layer_norm: + input_name = net.name.replace(':', '_') + net = layer_norm(net, reuse=tf.AUTO_REUSE, name='ln_' + input_name) + + # initializer = tf.initializers.variance_scaling() + initializer = tf.glorot_uniform_initializer() + + if self._projection_dim is None: + mask = tf.layers.dense( + mask_input, + aggregation_size, + activation=tf.nn.relu, + kernel_initializer=initializer, + kernel_regularizer=self.l2_reg, + name='%s/hidden' % self.name, + reuse=self.reuse) + else: + u = tf.layers.dense( + mask_input, + self._projection_dim, + kernel_initializer=initializer, + kernel_regularizer=self.l2_reg, + use_bias=False, + name='%s/prj_u' % self.name, + reuse=self.reuse) + mask = tf.layers.dense( + u, + aggregation_size, + activation=tf.nn.relu, + kernel_initializer=initializer, + kernel_regularizer=self.l2_reg, + name='%s/prj_v' % self.name, + reuse=self.reuse) + mask = tf.layers.dense( + mask, net.shape[-1], name='%s/mask' % self.name, reuse=self.reuse) + masked_net = net * mask + + output_size = self.config.output_size + hidden = tf.layers.dense( + masked_net, + output_size, + use_bias=False, + name='%s/output' % self.name, + reuse=self.reuse) + ln_hidden = layer_norm( + hidden, name='%s/ln_output' % self.name, reuse=self.reuse) + return tf.nn.relu(ln_hidden) + + +class MaskNet(tf.keras.layers.Layer): + """MaskNet: Introducing Feature-Wise Multiplication to CTR Ranking Models by Instance-Guided Mask. + + Refer: https://arxiv.org/pdf/2102.07619.pdf + """ + + def __init__(self, params, name='mask_net', **kwargs): + super(MaskNet, self).__init__(name, **kwargs) + self.params = params + self.config = params.get_pb_config() + if self.config.HasField('mlp'): + p = Parameter.make_from_pb(self.config.mlp) + p.l2_regularizer = params.l2_regularizer + self.mlp = MLP(p, name='%s/mlp' % name) + else: + self.mlp = None + + def call(self, inputs, training=None, **kwargs): + if self.config.use_parallel: + mask_outputs = [] + for i, block_conf in enumerate(self.config.mask_blocks): + params = Parameter.make_from_pb(block_conf) + params.l2_regularizer = self.params.l2_regularizer + mask_layer = MaskBlock(params, name='%s/block_%d' % (self.name, i)) + mask_outputs.append(mask_layer((inputs, inputs))) + all_mask_outputs = tf.concat(mask_outputs, axis=1) + + if self.mlp is not None: + output = self.mlp(all_mask_outputs) + else: + output = all_mask_outputs + return output + else: + net = inputs + for i, block_conf in enumerate(self.config.mask_blocks): + params = Parameter.make_from_pb(block_conf) + params.l2_regularizer = self.params.l2_regularizer + mask_layer = MaskBlock(params, name='%s/block_%d' % (self.name, i)) + net = mask_layer((net, inputs)) + + if self.mlp is not None: + output = self.mlp(net) + else: + output = net + return output diff --git a/easy_rec/python/layers/keras/numerical_embedding.py b/easy_rec/python/layers/keras/numerical_embedding.py new file mode 100644 index 000000000..4d6a16ca5 --- /dev/null +++ b/easy_rec/python/layers/keras/numerical_embedding.py @@ -0,0 +1,196 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import math + +import tensorflow as tf + +from easy_rec.python.utils.activation import get_activation + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class NLinear(object): + """N linear layers for N token (feature) embeddings. + + To understand this module, let's revise `tf.layers.dense`. When `tf.layers.dense` is + applied to three-dimensional inputs of the shape + ``(batch_size, n_tokens, d_embedding)``, then the same linear transformation is + applied to each of ``n_tokens`` token (feature) embeddings. + + By contrast, `NLinear` allocates one linear layer per token (``n_tokens`` layers in total). + One such layer can be represented as ``tf.layers.dense(d_in, d_out)``. + So, the i-th linear transformation is applied to the i-th token embedding, as + illustrated in the following pseudocode:: + + layers = [tf.layers.dense(d_in, d_out) for _ in range(n_tokens)] + x = tf.random.normal(batch_size, n_tokens, d_in) + result = tf.stack([layers[i](x[:, i]) for i in range(n_tokens)], 1) + + Examples: + .. testcode:: + + batch_size = 2 + n_features = 3 + d_embedding_in = 4 + d_embedding_out = 5 + x = tf.random.normal(batch_size, n_features, d_embedding_in) + m = NLinear(n_features, d_embedding_in, d_embedding_out) + assert m(x).shape == (batch_size, n_features, d_embedding_out) + """ + + def __init__(self, n_tokens, d_in, d_out, bias=True, scope='nd_linear'): + """Init with input shapes. + + Args: + n_tokens: the number of tokens (features) + d_in: the input dimension + d_out: the output dimension + bias: indicates if the underlying linear layers have biases + scope: variable scope name + """ + with tf.variable_scope(scope): + self.weight = tf.get_variable( + 'weights', [1, n_tokens, d_in, d_out], dtype=tf.float32) + if bias: + initializer = tf.constant_initializer(0.0) + self.bias = tf.get_variable( + 'bias', [1, n_tokens, d_out], + dtype=tf.float32, + initializer=initializer) + else: + self.bias = None + + def __call__(self, x, *args, **kwargs): + if x.shape.ndims != 3: + raise ValueError( + 'The input must have three dimensions (batch_size, n_tokens, d_embedding)' + ) + if x.shape[2] != self.weight.shape[2]: + raise ValueError('invalid input embedding dimension %d, expect %d' % + (int(x.shape[2]), int(self.weight.shape[2]))) + + x = x[..., None] * self.weight # [B, N, D, D_out] + x = tf.reduce_sum(x, axis=-2) # [B, N, D_out] + if self.bias is not None: + x = x + self.bias + return x + + +class PeriodicEmbedding(tf.keras.layers.Layer): + """Periodic embeddings for numerical features described in [1]. + + References: + * [1] Yury Gorishniy, Ivan Rubachev, Artem Babenko, + "On Embeddings for Numerical Features in Tabular Deep Learning", 2022 + https://arxiv.org/pdf/2203.05556.pdf + + Attributes: + embedding_dim: the embedding size, must be an even positive integer. + sigma: the scale of the weight initialization. + **This is a super important parameter which significantly affects performance**. + Its optimal value can be dramatically different for different datasets, so + no "default value" can exist for this parameter, and it must be tuned for + each dataset. In the original paper, during hyperparameter tuning, this + parameter was sampled from the distribution ``LogUniform[1e-2, 1e2]``. + A similar grid would be ``[1e-2, 1e-1, 1e0, 1e1, 1e2]``. + If possible, add more intermediate values to this grid. + output_3d_tensor: whether to output a 3d tensor + output_tensor_list: whether to output the list of embedding + """ + + def __init__(self, params, name='periodic_embedding', **kwargs): + super(PeriodicEmbedding, self).__init__(name, **kwargs) + params.check_required(['embedding_dim', 'sigma']) + self.embedding_dim = int(params.embedding_dim) + if self.embedding_dim % 2: + raise ValueError('embedding_dim must be even') + sigma = params.sigma + self.initializer = tf.random_normal_initializer(stddev=sigma) + self.add_linear_layer = params.get_or_default('add_linear_layer', True) + self.linear_activation = params.get_or_default('linear_activation', 'relu') + self.output_tensor_list = params.get_or_default('output_tensor_list', False) + self.output_3d_tensor = params.get_or_default('output_3d_tensor', False) + + def call(self, inputs, **kwargs): + if inputs.shape.ndims != 2: + raise ValueError('inputs of PeriodicEmbedding must have 2 dimensions.') + + num_features = int(inputs.shape[-1]) + emb_dim = self.embedding_dim // 2 + with tf.variable_scope(self.name): + c = tf.get_variable( + 'coefficients', + shape=[1, num_features, emb_dim], + initializer=self.initializer) + + features = inputs[..., None] # [B, N, 1] + v = 2 * math.pi * c * features # [B, N, E] + emb = tf.concat([tf.sin(v), tf.cos(v)], axis=-1) # [B, N, 2E] + + dim = self.embedding_dim + if self.add_linear_layer: + linear = NLinear(num_features, dim, dim) + emb = linear(emb) + act = get_activation(self.linear_activation) + if callable(act): + emb = act(emb) + output = tf.reshape(emb, [-1, num_features * dim]) + + if self.output_tensor_list: + return output, tf.unstack(emb, axis=1) + if self.output_3d_tensor: + return output, emb + return output + + +class AutoDisEmbedding(tf.keras.layers.Layer): + """An Embedding Learning Framework for Numerical Features in CTR Prediction. + + Refer: https://arxiv.org/pdf/2012.08986v2.pdf + """ + + def __init__(self, params, name='auto_dis_embedding', **kwargs): + super(AutoDisEmbedding, self).__init__(name, **kwargs) + params.check_required(['embedding_dim', 'num_bins', 'temperature']) + self.emb_dim = int(params.embedding_dim) + self.num_bins = int(params.num_bins) + self.temperature = params.temperature + self.keep_prob = params.get_or_default('keep_prob', 0.8) + self.output_tensor_list = params.get_or_default('output_tensor_list', False) + self.output_3d_tensor = params.get_or_default('output_3d_tensor', False) + + def call(self, inputs, **kwargs): + if inputs.shape.ndims != 2: + raise ValueError('inputs of AutoDisEmbedding must have 2 dimensions.') + + num_features = int(inputs.shape[-1]) + with tf.variable_scope(self.name): + meta_emb = tf.get_variable( + 'meta_embedding', + shape=[1, num_features, self.num_bins, self.emb_dim]) + w = tf.get_variable('project_w', shape=[1, num_features, self.num_bins]) + mat = tf.get_variable( + 'project_mat', shape=[1, num_features, self.num_bins, self.num_bins]) + + x = tf.expand_dims(inputs, axis=-1) # [B, N, 1] + hidden = tf.nn.leaky_relu(w * x) # [B, N, num_bin] + + y = tf.matmul(mat, hidden[..., None]) # [B, N, num_bin, 1] + y = tf.squeeze(y, axis=3) # [B, N, num_bin] + + # keep_prob(float): if dropout_flag is True, keep_prob rate to keep connect + alpha = self.keep_prob + x_bar = y + alpha * hidden # [B, N, num_bin] + x_hat = tf.nn.softmax(x_bar / self.temperature) # [B, N, num_bin] + + emb = tf.matmul(x_hat[:, :, None, :], meta_emb) # [B, N, 1, D] + emb = tf.squeeze(emb, axis=2) # [B, N, D] + output = tf.reshape(emb, [-1, self.emb_dim * num_features]) # [B, N*D] + + if self.output_tensor_list: + return output, tf.unstack(emb, axis=1) + + if self.output_3d_tensor: + return output, emb + return output diff --git a/easy_rec/python/layers/sequence_encoder.py b/easy_rec/python/layers/sequence_encoder.py new file mode 100644 index 000000000..24dab9754 --- /dev/null +++ b/easy_rec/python/layers/sequence_encoder.py @@ -0,0 +1,95 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import logging + +import tensorflow as tf + +from easy_rec.python.layers.keras.bst import BST +from easy_rec.python.layers.keras.din import DIN +from easy_rec.python.protos.feature_config_pb2 import FeatureConfig + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +class SequenceEncoder(object): + + def __init__(self, input_layer, feature_configs, feature_groups_config, + l2_reg): + self._input_layer = input_layer + self._feature_groups_config = { + x.group_name: x for x in feature_groups_config + } + self._l2_reg = l2_reg + self._feature_config_by_name = { + x.feature_name if x.HasField('feature_name') else x.input_names[0]: x + for x in feature_configs + } + + for name, group in self._feature_groups_config.items(): + if len(group.sequence_encoders) == 0: + continue + check_share_emb = False + for encoder in group.sequence_encoders: + if encoder.force_share_embeddings: + check_share_emb = True + break + if not check_share_emb: + continue + if not self.check_share_embedding(group): + raise ValueError( + 'sequence feature group `%s` check share embedding failed, ' + 'you should add `embedding_name` to feature config' % name) + + def check_share_embedding(self, feature_group): + seq_emb_names = set() + target_emb_names = set() + for feature in feature_group.feature_names: + conf = self._feature_config_by_name[feature] + if not conf.HasField('embedding_name'): + return False + if conf.feature_type == FeatureConfig.FeatureType.SequenceFeature: + seq_emb_names.add(conf.embedding_name) + else: + target_emb_names.add(conf.embedding_name) + + if seq_emb_names != target_emb_names: + tf.logging.error( + 'sequence share embedding names: %s, target share embedding names: %s' + % (','.join(seq_emb_names), ','.join(target_emb_names))) + return False + return True + + def __call__(self, features, group_name, is_training=True, *args, **kwargs): + group_config = self._feature_groups_config[group_name] + if len(group_config.sequence_encoders) == 0: + return None + + seq_features, target_feature, target_features = self._input_layer( + features, group_name, is_combine=False) + assert len( + seq_features) > 0, 'sequence feature is empty in group: ' + group_name + + outputs = [] + for encoder in group_config.sequence_encoders: + encoder_type = encoder.WhichOneof('encoder').lower() + if encoder_type == 'bst': + bst = BST(encoder.bst, self._l2_reg, name=group_name) + encoding = bst([seq_features, target_feature], is_training, **kwargs) + outputs.append(encoding) + elif encoder_type == 'din': + din = DIN(encoder.din, self._l2_reg, name=group_name) + encoding = din([seq_features, target_feature], is_training) + outputs.append(encoding) + else: + assert False, 'unsupported sequence encode type: ' + encoder_type + + if len(outputs) == 0: + logging.warning( + "there's no sequence encoder configured in feature group: " + + group_name) + return None + if len(outputs) == 1: + return outputs[0] + + return tf.concat(outputs, axis=-1) diff --git a/easy_rec/python/layers/uniter.py b/easy_rec/python/layers/uniter.py index fa5c6a3ca..3018bad61 100644 --- a/easy_rec/python/layers/uniter.py +++ b/easy_rec/python/layers/uniter.py @@ -32,7 +32,8 @@ def __init__(self, model_config, feature_configs, features, uniter_config, tower_num += 1 self._txt_seq_features = None if input_layer.has_group('text'): - self._txt_seq_features = input_layer(features, 'text', is_combine=False) + self._txt_seq_features, _, _ = input_layer( + features, 'text', is_combine=False) tower_num += 1 self._use_token_type = True if tower_num > 1 else False self._other_features = None diff --git a/easy_rec/python/layers/utils.py b/easy_rec/python/layers/utils.py index 43204241c..b95eef2fe 100644 --- a/easy_rec/python/layers/utils.py +++ b/easy_rec/python/layers/utils.py @@ -19,6 +19,7 @@ import json +from google.protobuf import struct_pb2 from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import variables @@ -158,3 +159,73 @@ def mark_input_src(name, src_desc): 'name': name, 'src': src_desc })) + + +class Parameter(object): + + def __init__(self, params, is_struct, l2_reg=None): + self.params = params + self.is_struct = is_struct + self._l2_reg = l2_reg + + @staticmethod + def make_from_pb(config): + return Parameter(config, False) + + def get_pb_config(self): + assert not self.is_struct, 'Struct parameter can not convert to pb config' + return self.params + + @property + def l2_regularizer(self): + return self._l2_reg + + @l2_regularizer.setter + def l2_regularizer(self, value): + self._l2_reg = value + + def __getattr__(self, key): + if self.is_struct: + value = self.params[key] + if type(value) == struct_pb2.Struct: + return Parameter(value, True, self._l2_reg) + else: + return value + return getattr(self.params, key) + + def __getitem__(self, key): + return self.__getattr__(key) + + def get_or_default(self, key, def_val): + if self.is_struct: + if key in self.params: + if def_val is None: + return self.params[key] + value = self.params[key] + if type(value) == float: + return type(def_val)(value) + return value + return def_val + else: # pb message + value = getattr(self.params, key) + if hasattr(value, '__len__'): + if len(value) > 0: + return value + elif self.params.HasField(key): + return value + return def_val + + def check_required(self, keys): + if not self.is_struct: + return + if not isinstance(keys, (list, tuple)): + keys = [keys] + for key in keys: + if key not in self.params: + raise KeyError('%s must be set in params') + + def has_field(self, key): + if self.is_struct: + return key in self.params + else: + return self.params.HasField(key) diff --git a/easy_rec/python/loss/info_nce_loss.py b/easy_rec/python/loss/info_nce_loss.py new file mode 100644 index 000000000..3fd6b6b18 --- /dev/null +++ b/easy_rec/python/loss/info_nce_loss.py @@ -0,0 +1,41 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import tensorflow as tf + +if tf.__version__ >= '2.0': + tf = tf.compat.v1 + + +def info_nce_loss(query, positive, temperature=0.1): + """Calculates the InfoNCE loss for self-supervised learning. + + This contrastive loss enforces the embeddings of similar (positive) samples to be close + and those of different (negative) samples to be distant. + A query embedding is compared with one positive key and with one or more negative keys. + + References: + https://arxiv.org/abs/1807.03748v2 + https://arxiv.org/abs/2010.05113 + """ + # Check input dimensionality. + if query.shape.ndims != 2: + raise ValueError(' must have 2 dimensions.') + if positive.shape.ndims != 2: + raise ValueError(' must have 2 dimensions.') + # Embedding vectors should have same number of components. + if query.shape[-1] != positive.shape[-1]: + raise ValueError( + 'Vectors of and should have the same number of components.' + ) + + # Negative keys are implicitly off-diagonal positive keys. + + # Cosine between all combinations + logits = tf.matmul(query, positive, transpose_b=True) + logits /= temperature + + # Positive keys are the entries on the diagonal + batch_size = tf.shape(query)[0] + labels = tf.range(batch_size) + + return tf.losses.sparse_softmax_cross_entropy(labels, logits) diff --git a/easy_rec/python/loss/jrc_loss.py b/easy_rec/python/loss/jrc_loss.py index fc8266b2c..778068e7e 100644 --- a/easy_rec/python/loss/jrc_loss.py +++ b/easy_rec/python/loss/jrc_loss.py @@ -12,7 +12,9 @@ def jrc_loss(labels, logits, session_ids, alpha=0.5, - auto_weight=False, + loss_weight_strategy='fixed', + sample_weights=1.0, + same_label_loss=True, name=''): """Joint Optimization of Ranking and Calibration with Contextualized Hybrid Model. @@ -23,14 +25,18 @@ def jrc_loss(labels, logits: a `Tensor` with shape [batch_size, 2]. e.g. the value of last neuron before activation. session_ids: a `Tensor` with shape [batch_size]. Session ids of each sample, used to max GAUC metric. e.g. user_id alpha: the weight to balance ranking loss and calibration loss - auto_weight: bool, whether to learn loss weight between ranking loss and calibration loss + loss_weight_strategy: str, the loss weight strategy to balancing between ce_loss and ge_loss + sample_weights: Coefficients for the loss. This must be scalar or broadcastable to + `labels` (i.e. same rank and each dimension is either 1 or the same). + same_label_loss: enable ge_loss for sample with same label in a session or not. name: the name of loss """ loss_name = name if name else 'jrc_loss' - logging.info('[{}] alpha: {}, auto_weight: {}'.format(loss_name, alpha, - auto_weight)) + logging.info('[{}] alpha: {}, loss_weight_strategy: {}'.format( + loss_name, alpha, loss_weight_strategy)) - ce_loss = tf.losses.sparse_softmax_cross_entropy(labels, logits) + ce_loss = tf.losses.sparse_softmax_cross_entropy( + labels, logits, weights=sample_weights) labels = tf.expand_dims(labels, 1) # [B, 1] labels = tf.concat([1 - labels, labels], axis=1) # [B, 2] @@ -54,13 +60,56 @@ def jrc_loss(labels, y_neg, y_pos = y[:, :, 0], y[:, :, 1] l_neg, l_pos = logits[:, :, 0], logits[:, :, 1] + if tf.is_numeric_tensor(sample_weights): + logging.info('[%s] use sample weight' % loss_name) + weights = tf.expand_dims(tf.cast(sample_weights, tf.float32), 0) + pairwise_weights = tf.tile(weights, tf.stack([batch_size, 1])) + y_pos *= pairwise_weights + y_neg *= pairwise_weights + # Compute list-wise generative loss -log p(x|y, z) - loss_pos = -tf.reduce_sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) - loss_neg = -tf.reduce_sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) - ge_loss = tf.reduce_mean((loss_pos + loss_neg) / tf.reduce_sum(mask, axis=0)) + if same_label_loss: + logging.info('[%s] enable same_label_loss' % loss_name) + loss_pos = -tf.reduce_sum(y_pos * tf.nn.log_softmax(l_pos, axis=0), axis=0) + loss_neg = -tf.reduce_sum(y_neg * tf.nn.log_softmax(l_neg, axis=0), axis=0) + ge_loss = tf.reduce_mean( + (loss_pos + loss_neg) / tf.reduce_sum(mask, axis=0)) + else: + logging.info('[%s] disable same_label_loss' % loss_name) + diag = tf.one_hot(tf.range(batch_size), batch_size) + l_pos = l_pos + (1 - diag) * y_pos * -1e9 + l_neg = l_neg + (1 - diag) * y_neg * -1e9 + loss_pos = -tf.linalg.diag_part(y_pos * tf.nn.log_softmax(l_pos, axis=0)) + loss_neg = -tf.linalg.diag_part(y_neg * tf.nn.log_softmax(l_neg, axis=0)) + ge_loss = tf.reduce_mean(loss_pos + loss_neg) + + tf.summary.scalar('loss/%s_ce' % loss_name, ce_loss) + tf.summary.scalar('loss/%s_ge' % loss_name, ge_loss) # The final JRC model - if auto_weight: + if loss_weight_strategy == 'fixed': + loss = alpha * ce_loss + (1 - alpha) * ge_loss + elif loss_weight_strategy == 'random_uniform': + weight = tf.random_uniform([]) + loss = weight * ce_loss + (1 - weight) * ge_loss + tf.summary.scalar('loss/%s_ce_weight' % loss_name, weight) + tf.summary.scalar('loss/%s_ge_weight' % loss_name, 1 - weight) + elif loss_weight_strategy == 'random_normal': + weights = tf.random_normal([2]) + loss_weight = tf.nn.softmax(weights) + loss = loss_weight[0] * ce_loss + loss_weight[1] * ge_loss + tf.summary.scalar('loss/%s_ce_weight' % loss_name, loss_weight[0]) + tf.summary.scalar('loss/%s_ge_weight' % loss_name, loss_weight[1]) + elif loss_weight_strategy == 'random_bernoulli': + bern = tf.distributions.Bernoulli(probs=0.5, dtype=tf.float32) + weights = bern.sample(2) + loss_weight = tf.cond( + tf.equal(tf.reduce_sum(weights), 1), lambda: weights, + lambda: tf.convert_to_tensor([0.5, 0.5])) + loss = loss_weight[0] * ce_loss + loss_weight[1] * ge_loss + tf.summary.scalar('loss/%s_ce_weight' % loss_name, loss_weight[0]) + tf.summary.scalar('loss/%s_ge_weight' % loss_name, loss_weight[1]) + elif loss_weight_strategy == 'uncertainty': uncertainty1 = tf.Variable( 0, name='%s_ranking_loss_weight' % loss_name, dtype=tf.float32) tf.summary.scalar('loss/%s_ranking_uncertainty' % loss_name, uncertainty1) @@ -71,5 +120,6 @@ def jrc_loss(labels, loss = tf.exp(-uncertainty1) * ce_loss + 0.5 * uncertainty1 loss += tf.exp(-uncertainty2) * ge_loss + 0.5 * uncertainty2 else: - loss = alpha * ce_loss + (1 - alpha) * ge_loss + raise ValueError('Unsupported loss weight strategy `%s` for jrc loss' % + loss_weight_strategy) return loss diff --git a/easy_rec/python/loss/nce_loss.py b/easy_rec/python/loss/nce_loss.py new file mode 100644 index 000000000..f2e406d20 --- /dev/null +++ b/easy_rec/python/loss/nce_loss.py @@ -0,0 +1,39 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. + +import tensorflow as tf + +from easy_rec.python.utils.shape_utils import get_shape_list + + +def mask_samples(batch_size): + part = tf.ones((batch_size, batch_size), bool) + diag_part = tf.linalg.diag_part(part) + diag_part = tf.fill(tf.shape(diag_part), False) + part = tf.linalg.set_diag(part, diag_part) + part_half = tf.concat([part, part], axis=1) + part_total = tf.concat([part_half, part_half], axis=0) + return part_total + + +def nce_loss(z_i, z_j, temp=1): + batch_size = get_shape_list(z_i)[0] + N = 2 * batch_size + z = tf.concat((z_i, z_j), axis=0) + sim = tf.matmul(z, tf.transpose(z)) / temp + sim_i_j = tf.matrix_diag_part( + tf.slice(sim, [batch_size, 0], [batch_size, batch_size])) + sim_j_i = tf.matrix_diag_part( + tf.slice(sim, [0, batch_size], [batch_size, batch_size])) + positive_samples = tf.reshape(tf.concat((sim_i_j, sim_j_i), axis=0), (N, 1)) + mask = mask_samples(batch_size) + negative_samples = tf.reshape(tf.boolean_mask(sim, mask), (N, -1)) + + labels = tf.zeros(N, dtype=tf.int32) + logits = tf.concat((positive_samples, negative_samples), axis=1) + + loss = tf.reduce_mean( + tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits)) + + return loss diff --git a/easy_rec/python/model/collaborative_metric_learning.py b/easy_rec/python/model/collaborative_metric_learning.py index d785e7141..b19537239 100644 --- a/easy_rec/python/model/collaborative_metric_learning.py +++ b/easy_rec/python/model/collaborative_metric_learning.py @@ -48,21 +48,22 @@ def __init__( raise ValueError('unsupported loss type: %s' % LossType.Name(self._loss_type)) - self._highway_features = {} - self._highway_num = len(self._model_config.highway) - for _id in range(self._highway_num): - highway_cfg = self._model_config.highway[_id] - highway_feature, _ = self._input_layer(self._feature_dict, - highway_cfg.input) - self._highway_features[highway_cfg.input] = highway_feature - - self.input_features = [] - if self._model_config.HasField('input'): - input_feature, _ = self._input_layer(self._feature_dict, - self._model_config.input) - self.input_features.append(input_feature) - - self.dnn = copy_obj(self._model_config.dnn) + if not self.has_backbone: + self._highway_features = {} + self._highway_num = len(self._model_config.highway) + for _id in range(self._highway_num): + highway_cfg = self._model_config.highway[_id] + highway_feature, _ = self._input_layer(self._feature_dict, + highway_cfg.input) + self._highway_features[highway_cfg.input] = highway_feature + + self.input_features = [] + if self._model_config.HasField('input'): + input_feature, _ = self._input_layer(self._feature_dict, + self._model_config.input) + self.input_features.append(input_feature) + + self.dnn = copy_obj(self._model_config.dnn) if self._labels is not None: if self._model_config.HasField('session_id'): @@ -79,32 +80,35 @@ def __init__( self.sample_id = None def build_predict_graph(self): - for _id in range(self._highway_num): - highway_cfg = self._model_config.highway[_id] - highway_fea = tf.layers.batch_normalization( - self._highway_features[highway_cfg.input], - training=self._is_training, - trainable=True, - name='highway_%s_bn' % highway_cfg.input) - highway_fea = highway( - highway_fea, - highway_cfg.emb_size, - activation=gelu, - scope='highway_%s' % _id) - print('highway_fea: ', highway_fea) - self.input_features.append(highway_fea) - - feature = tf.concat(self.input_features, axis=1) - - num_dnn_layer = len(self.dnn.hidden_units) - last_hidden = self.dnn.hidden_units.pop() - dnn_net = dnn.DNN(self.dnn, self._l2_reg, 'dnn', self._is_training) - net_output = dnn_net(feature) - tower_emb = tf.layers.dense( - inputs=net_output, - units=last_hidden, - kernel_regularizer=self._l2_reg, - name='dnn/dnn_%d' % (num_dnn_layer - 1)) + if self.has_backbone: + tower_emb = self.backbone + else: + for _id in range(self._highway_num): + highway_cfg = self._model_config.highway[_id] + highway_fea = tf.layers.batch_normalization( + self._highway_features[highway_cfg.input], + training=self._is_training, + trainable=True, + name='highway_%s_bn' % highway_cfg.input) + highway_fea = highway( + highway_fea, + highway_cfg.emb_size, + activation=gelu, + scope='highway_%s' % _id) + print('highway_fea: ', highway_fea) + self.input_features.append(highway_fea) + + feature = tf.concat(self.input_features, axis=1) + + num_dnn_layer = len(self.dnn.hidden_units) + last_hidden = self.dnn.hidden_units.pop() + dnn_net = dnn.DNN(self.dnn, self._l2_reg, 'dnn', self._is_training) + net_output = dnn_net(feature) + tower_emb = tf.layers.dense( + inputs=net_output, + units=last_hidden, + kernel_regularizer=self._l2_reg, + name='dnn/dnn_%d' % (num_dnn_layer - 1)) if self._model_config.output_l2_normalized_emb: norm_emb = tf.nn.l2_normalize(tower_emb, axis=-1) diff --git a/easy_rec/python/model/dbmtl.py b/easy_rec/python/model/dbmtl.py index 913793474..e87ee9ae7 100644 --- a/easy_rec/python/model/dbmtl.py +++ b/easy_rec/python/model/dbmtl.py @@ -37,24 +37,27 @@ def __init__(self, features, self._model_config.bottom_uniter, self._input_layer) - else: - self._features, _ = self._input_layer(self._feature_dict, 'all') + elif not self.has_backbone: + self._features, self._feature_list = self._input_layer( + self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def build_predict_graph(self): - if self._model_config.HasField('bottom_cmbf'): - bottom_fea = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) - elif self._model_config.HasField('bottom_uniter'): - bottom_fea = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) - elif self._model_config.HasField('bottom_dnn'): - bottom_dnn = dnn.DNN( - self._model_config.bottom_dnn, - self._l2_reg, - name='bottom_dnn', - is_training=self._is_training) - bottom_fea = bottom_dnn(self._features) - else: - bottom_fea = self._features + bottom_fea = self.backbone + if bottom_fea is None: + if self._model_config.HasField('bottom_cmbf'): + bottom_fea = self._cmbf_layer(self._is_training, l2_reg=self._l2_reg) + elif self._model_config.HasField('bottom_uniter'): + bottom_fea = self._uniter_layer(self._is_training, l2_reg=self._l2_reg) + elif self._model_config.HasField('bottom_dnn'): + bottom_dnn = dnn.DNN( + self._model_config.bottom_dnn, + self._l2_reg, + name='bottom_dnn', + is_training=self._is_training) + bottom_fea = bottom_dnn(self._features) + else: + bottom_fea = self._features # MMOE block if self._model_config.HasField('expert_dnn'): diff --git a/easy_rec/python/model/easy_rec_model.py b/easy_rec/python/model/easy_rec_model.py index 7416c5cc4..522d3632e 100644 --- a/easy_rec/python/model/easy_rec_model.py +++ b/easy_rec/python/model/easy_rec_model.py @@ -12,6 +12,7 @@ from easy_rec.python.compat import regularizers from easy_rec.python.layers import input_layer +from easy_rec.python.layers.backbone import Backbone from easy_rec.python.utils import constant from easy_rec.python.utils import estimator_utils from easy_rec.python.utils import restore_filter @@ -36,6 +37,7 @@ def __init__(self, self._base_model_config = model_config self._model_config = model_config self._is_training = is_training + self._is_predicting = labels is None self._feature_dict = features # embedding variable parameters @@ -46,7 +48,7 @@ def __init__(self, self._emb_reg = regularizers.l2_regularizer(self.embedding_regularization) self._l2_reg = regularizers.l2_regularizer(self.l2_regularization) # only used by model with wide feature groups, e.g. WideAndDeep - self._wide_output_dim = -1 + self._wide_output_dim = self.get_wide_output_dim() self._feature_configs = feature_configs self.build_input_layer(model_config, feature_configs) @@ -60,6 +62,31 @@ def __init__(self, if constant.SAMPLE_WEIGHT in features: self._sample_weight = features[constant.SAMPLE_WEIGHT] + self._backbone_output = None + if model_config.HasField('backbone'): + self._backbone = Backbone( + model_config.backbone, + features, + input_layer=self._input_layer, + l2_reg=self._l2_reg) + else: + self._backbone = None + + @property + def has_backbone(self): + return self._base_model_config.HasField('backbone') + + @property + def backbone(self): + if self._backbone_output: + return self._backbone_output + if self._backbone: + self._backbone_output = self._backbone(self._is_training) + loss_dict = self._backbone.loss_dict + self._loss_dict.update(loss_dict) + return self._backbone_output + return None + @property def embedding_regularization(self): return self._base_model_config.embedding_regularization @@ -87,6 +114,13 @@ def l2_regularization(self): l2_regularization = model_config.l2_regularization return l2_regularization + def get_wide_output_dim(self): + model_config = getattr(self._base_model_config, + self._base_model_config.WhichOneof('model')) + if hasattr(model_config, 'wide_output_dim'): + return model_config.wide_output_dim + return -1 + def build_input_layer(self, model_config, feature_configs): self._input_layer = input_layer.InputLayer( feature_configs, @@ -97,7 +131,8 @@ def build_input_layer(self, model_config, feature_configs): kernel_regularizer=self._l2_reg, variational_dropout_config=model_config.variational_dropout if model_config.HasField('variational_dropout') else None, - is_training=self._is_training) + is_training=self._is_training, + is_predicting=self._is_predicting) @abstractmethod def build_predict_graph(self): diff --git a/easy_rec/python/model/esmm.py b/easy_rec/python/model/esmm.py index c6eaad483..50567ae63 100644 --- a/easy_rec/python/model/esmm.py +++ b/easy_rec/python/model/esmm.py @@ -31,7 +31,9 @@ def __init__(self, self._group_num = len(self._model_config.groups) self._group_features = [] - if self._group_num > 0: + if self.has_backbone: + logging.info('use bottom backbone network') + elif self._group_num > 0: logging.info('group_num: {0}'.format(self._group_num)) for group_id in range(self._group_num): group = self._model_config.groups[group_id] @@ -173,7 +175,9 @@ def build_predict_graph(self): Returns: self._prediction_dict: Prediction result of two tasks. """ - if self._group_num > 0: + if self.has_backbone: + all_fea = self.backbone + elif self._group_num > 0: group_fea_arr = [] # Both towers share the underlying network. for group_id in range(self._group_num): diff --git a/easy_rec/python/model/mind.py b/easy_rec/python/model/mind.py index c414703d2..270060297 100644 --- a/easy_rec/python/model/mind.py +++ b/easy_rec/python/model/mind.py @@ -32,7 +32,7 @@ def __init__(self, 'invalid model config: %s' % self._model_config.WhichOneof('model') self._model_config = self._model_config.mind - self._hist_seq_features = self._input_layer( + self._hist_seq_features, _, _ = self._input_layer( self._feature_dict, 'hist', is_combine=False) self._user_features, _ = self._input_layer(self._feature_dict, 'user') self._item_features, _ = self._input_layer(self._feature_dict, 'item') diff --git a/easy_rec/python/model/mmoe.py b/easy_rec/python/model/mmoe.py index acf1d6d59..3cc644f6d 100644 --- a/easy_rec/python/model/mmoe.py +++ b/easy_rec/python/model/mmoe.py @@ -26,7 +26,10 @@ def __init__(self, self._model_config = self._model_config.mmoe assert isinstance(self._model_config, MMoEConfig) - self._features, _ = self._input_layer(self._feature_dict, 'all') + if self.has_backbone: + self._features = self.backbone + else: + self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def build_predict_graph(self): diff --git a/easy_rec/python/model/multi_task_model.py b/easy_rec/python/model/multi_task_model.py index 43e5663ce..21e8f2c55 100644 --- a/easy_rec/python/model/multi_task_model.py +++ b/easy_rec/python/model/multi_task_model.py @@ -5,6 +5,7 @@ import tensorflow as tf from easy_rec.python.builders import loss_builder +from easy_rec.python.layers.dnn import DNN from easy_rec.python.model.rank_model import RankModel from easy_rec.python.protos import tower_pb2 from easy_rec.python.protos.loss_pb2 import LossType @@ -27,6 +28,71 @@ def __init__(self, self._task_num = None self._label_name_dict = {} + def build_predict_graph(self): + if not self.has_backbone: + raise NotImplementedError( + 'method `build_predict_graph` must be implemented when backbone network do not exits' + ) + model = self._model_config.WhichOneof('model') + assert model == 'model_params', '`model_params` must be configured' + config = self._model_config.model_params + + self._init_towers(config.task_towers) + + backbone = self.backbone + if type(backbone) in (list, tuple): + if len(backbone) != len(config.task_towers): + raise ValueError( + 'The number of backbone outputs and task towers must be equal') + task_input_list = backbone + else: + task_input_list = [backbone] * len(config.task_towers) + + tower_features = {} + for i, task_tower_cfg in enumerate(config.task_towers): + tower_name = task_tower_cfg.tower_name + if task_tower_cfg.HasField('dnn'): + tower_dnn = DNN( + task_tower_cfg.dnn, + self._l2_reg, + name=tower_name, + is_training=self._is_training) + tower_output = tower_dnn(task_input_list[i]) + else: + tower_output = task_input_list[i] + tower_features[tower_name] = tower_output + + tower_outputs = {} + relation_features = {} + # bayes network + for task_tower_cfg in config.task_towers: + tower_name = task_tower_cfg.tower_name + if task_tower_cfg.HasField('relation_dnn'): + relation_dnn = DNN( + task_tower_cfg.relation_dnn, + self._l2_reg, + name=tower_name + '/relation_dnn', + is_training=self._is_training) + tower_inputs = [tower_features[tower_name]] + for relation_tower_name in task_tower_cfg.relation_tower_names: + tower_inputs.append(relation_features[relation_tower_name]) + relation_input = tf.concat( + tower_inputs, axis=-1, name=tower_name + '/relation_input') + relation_fea = relation_dnn(relation_input) + relation_features[tower_name] = relation_fea + else: + relation_fea = tower_features[tower_name] + + output_logits = tf.layers.dense( + relation_fea, + task_tower_cfg.num_class, + kernel_regularizer=self._l2_reg, + name=tower_name + '/output') + tower_outputs[tower_name] = output_logits + + self._add_to_prediction_dict(tower_outputs) + return self._prediction_dict + def _init_towers(self, task_tower_configs): """Init task towers.""" self._task_towers = task_tower_configs @@ -88,6 +154,17 @@ def build_metric_graph(self, eval_config): def build_loss_graph(self): """Build loss graph for multi task model.""" + strategy = self._base_model_config.loss_weight_strategy + loss_weight_arr = [1.0] * len(self._task_towers) + if strategy == self._base_model_config.Random: + num = 0 + for task_tower_cfg in self._task_towers: + losses = task_tower_cfg.losses + num += 1 if len(losses) == 0 else len(losses) + weights = tf.random_normal([num]) + loss_weight_arr = tf.nn.softmax(weights) + + offset = 0 for task_tower_cfg in self._task_towers: tower_name = task_tower_cfg.tower_name loss_weight = task_tower_cfg.weight @@ -111,8 +188,13 @@ def build_loss_graph(self): loss_weight=loss_weight, num_class=task_tower_cfg.num_class, suffix='_%s' % tower_name) + if strategy == self._base_model_config.Random: + for loss_name in loss_dict.keys(): + loss_dict[ + loss_name] = loss_dict[loss_name] * loss_weight_arr[offset] + offset += 1 else: - for loss in losses: + for i, loss in enumerate(losses): loss_param = loss.WhichOneof('loss_param') if loss_param is not None: loss_param = getattr(loss, loss_param) @@ -125,19 +207,30 @@ def build_loss_graph(self): loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): - if loss.learn_loss_weight: - uncertainty = tf.Variable( - 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) - tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) - if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: - loss_dict[loss_name] = 0.5 * tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + if strategy == self._base_model_config.Fixed: + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Uncertainty: + if loss.learn_loss_weight: + uncertainty = tf.Variable( + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_uncertainty' % loss_name, + uncertainty) + if loss.loss_type in { + LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS + }: + loss_dict[loss_name] = 0.5 * tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: - loss_dict[loss_name] = tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Random: + loss_dict[loss_name] = loss_value * loss_weight_arr[i + offset] else: - loss_dict[loss_name] = loss_value * loss.weight - + raise ValueError('Unsupported loss weight strategy: ' + + strategy.Name) + offset += len(losses) self._loss_dict.update(loss_dict) kd_loss_dict = loss_builder.build_kd_loss(self.kd, self._prediction_dict, diff --git a/easy_rec/python/model/ple.py b/easy_rec/python/model/ple.py index f3ad71215..e04781bcd 100644 --- a/easy_rec/python/model/ple.py +++ b/easy_rec/python/model/ple.py @@ -27,7 +27,10 @@ def __init__(self, self._layer_nums = len(self._model_config.extraction_networks) self._task_nums = len(self._model_config.task_towers) - self._features, _ = self._input_layer(self._feature_dict, 'all') + if self.has_backbone: + self._features = self.backbone + else: + self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def gate(self, selector_fea, vec_feas, name): diff --git a/easy_rec/python/model/rank_model.py b/easy_rec/python/model/rank_model.py index 25eff23ea..a5f447d86 100644 --- a/easy_rec/python/model/rank_model.py +++ b/easy_rec/python/model/rank_model.py @@ -29,6 +29,29 @@ def __init__(self, if self._labels is not None: self._label_name = list(self._labels.keys())[0] + def build_predict_graph(self): + if not self.has_backbone: + raise NotImplementedError( + 'method `build_predict_graph` must be implemented when backbone network do not exits' + ) + output = self.backbone + if int(output.shape[-1]) != self._num_class: + logging.info('add head logits layer for rank model') + output = tf.layers.dense(output, self._num_class, name='output') + # model_config = getattr(self._base_model_config, + # self._base_model_config.WhichOneof('model')) + # if hasattr(model_config, 'add_head_logits_layer') and \ + # model_config.HasField('add_head_logits_layer'): + # add_head_logits_layer = model_config.add_head_logits_layer + # else: + # add_head_logits_layer = True + # if add_head_logits_layer: + # logging.info('add head logits layer for rank model') + # output = tf.layers.dense(output, self._num_class, name='output') + + self._add_to_prediction_dict(output) + return self._prediction_dict + def _output_to_prediction_impl(self, output, loss_type, @@ -193,7 +216,12 @@ def build_loss_graph(self): loss_weight=self._sample_weight, num_class=self._num_class) else: - for loss in self._losses: + strategy = self._base_model_config.loss_weight_strategy + loss_weight = [1.0] + if strategy == self._base_model_config.Random and len(self._losses) > 1: + weights = tf.random_normal([len(self._losses)]) + loss_weight = tf.nn.softmax(weights) + for i, loss in enumerate(self._losses): loss_param = loss.WhichOneof('loss_param') if loss_param is not None: loss_param = getattr(loss, loss_param) @@ -205,18 +233,26 @@ def build_loss_graph(self): loss_name=loss.loss_name, loss_param=loss_param) for loss_name, loss_value in loss_ops.items(): - if loss.learn_loss_weight: - uncertainty = tf.Variable( - 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) - tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) - if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: - loss_dict[loss_name] = 0.5 * tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + if strategy == self._base_model_config.Fixed: + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Uncertainty: + if loss.learn_loss_weight: + uncertainty = tf.Variable( + 0, name='%s_loss_weight' % loss_name, dtype=tf.float32) + tf.summary.scalar('loss/%s_uncertainty' % loss_name, uncertainty) + if loss.loss_type in {LossType.L2_LOSS, LossType.SIGMOID_L2_LOSS}: + loss_dict[loss_name] = 0.5 * tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty + else: + loss_dict[loss_name] = tf.exp( + -uncertainty) * loss_value + 0.5 * uncertainty else: - loss_dict[loss_name] = tf.exp( - -uncertainty) * loss_value + 0.5 * uncertainty + loss_dict[loss_name] = loss_value * loss.weight + elif strategy == self._base_model_config.Random: + loss_dict[loss_name] = loss_value * loss_weight[i] else: - loss_dict[loss_name] = loss_value * loss.weight + raise ValueError('Unsupported loss weight strategy: ' + + strategy.Name) self._loss_dict.update(loss_dict) diff --git a/easy_rec/python/model/simple_multi_task.py b/easy_rec/python/model/simple_multi_task.py index b4c0613bc..05dd7a773 100644 --- a/easy_rec/python/model/simple_multi_task.py +++ b/easy_rec/python/model/simple_multi_task.py @@ -27,7 +27,10 @@ def __init__(self, self._model_config = self._model_config.simple_multi_task assert isinstance(self._model_config, SimpleMultiTaskConfig) - self._features, _ = self._input_layer(self._feature_dict, 'all') + if self.has_backbone: + self._features = self.backbone + else: + self._features, _ = self._input_layer(self._feature_dict, 'all') self._init_towers(self._model_config.task_towers) def build_predict_graph(self): diff --git a/easy_rec/python/protos/backbone.proto b/easy_rec/python/protos/backbone.proto new file mode 100644 index 000000000..67b230c04 --- /dev/null +++ b/easy_rec/python/protos/backbone.proto @@ -0,0 +1,95 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/keras_layer.proto"; + +message InputLayer { + optional bool do_batch_norm = 1; + optional bool do_layer_norm = 2; + optional float dropout_rate = 3; + optional float feature_dropout_rate = 4; + optional bool only_output_feature_list = 5; + optional bool only_output_3d_tensor = 6; + optional bool output_2d_tensor_and_feature_list = 7; + optional bool output_seq_and_normal_feature = 8; +} + +message Lambda { + required string expression = 1; +} + +message Input { + oneof name { + string feature_group_name = 1; + string block_name = 2; + string package_name = 3; + } + optional string input_fn = 11; + optional string input_slice = 12; +} + +message RecurrentLayer { + required uint32 num_steps = 1 [default = 1]; + optional uint32 fixed_input_index = 2; + required KerasLayer keras_layer = 3; +} + +message RepeatLayer { + required uint32 num_repeat = 1 [default = 1]; + // default output the list of multiple outputs + optional int32 output_concat_axis = 2; + required KerasLayer keras_layer = 3; +} + +message Layer { + oneof layer { + Lambda lambda = 1; + KerasLayer keras_layer = 2; + RecurrentLayer recurrent = 3; + RepeatLayer repeat = 4; + InputLayer input_layer = 5; + } +} + +message Block { + required string name = 1; + // the input names of feature groups or other blocks + repeated Input inputs = 2; + optional int32 input_concat_axis = 3 [default = -1]; + optional bool merge_inputs_into_list = 4; + optional string extra_input_fn = 5; + + // sequential layers + repeated Layer layers = 6; + + // only take effect when there are no layers + oneof layer { + InputLayer input_layer = 101; + Lambda lambda = 102; + KerasLayer keras_layer = 103; + RecurrentLayer recurrent = 104; + RepeatLayer repeat = 105; + } +} + +// a package of blocks for reuse; e.g. call in a contrastive learning manner +message BlockPackage { + // package name + required string name = 1; + // a few blocks generating a DAG + repeated Block blocks = 2; + // the names of output blocks + repeated string concat_blocks = 3; +} + +message BackboneTower { + // a few sub DAGs + repeated BlockPackage packages = 1; + // a few blocks generating a DAG + repeated Block blocks = 2; + // the names of output blocks + repeated string concat_blocks = 3; + // optional top mlp layer + optional MLP top_mlp = 4; +} diff --git a/easy_rec/python/protos/cmbf.proto b/easy_rec/python/protos/cmbf.proto index 598bf1ecf..34e082115 100644 --- a/easy_rec/python/protos/cmbf.proto +++ b/easy_rec/python/protos/cmbf.proto @@ -1,9 +1,50 @@ syntax = "proto2"; package protos; -import "easy_rec/python/protos/layer.proto"; import "easy_rec/python/protos/dnn.proto"; +message CMBFTower { + // The number of heads of cross modal fusion layer + required uint32 multi_head_num = 1 [default = 1]; + // The number of heads of image feature learning layer + required uint32 image_multi_head_num = 101 [default = 1]; + // The number of heads of text feature learning layer + required uint32 text_multi_head_num = 102 [default = 1]; + // The dimension of text heads + required uint32 text_head_size = 2; + // The dimension of image heads + required uint32 image_head_size = 3 [default = 64]; + // The number of patches of image feature, take effect when there is only one image feature + required uint32 image_feature_patch_num = 4 [default = 1]; + // Do dimension reduce to this size for image feature before single modal learning module + required uint32 image_feature_dim = 5 [default = 0]; + // The number of self attention layers for image features + required uint32 image_self_attention_layer_num = 6 [default = 0]; + // The number of self attention layers for text features + required uint32 text_self_attention_layer_num = 7 [default = 1]; + // The number of cross modal layers + required uint32 cross_modal_layer_num = 8 [default = 1]; + // The dimension of image cross modal heads + required uint32 image_cross_head_size = 9; + // The dimension of text cross modal heads + required uint32 text_cross_head_size = 10; + // Dropout probability for hidden layers + required float hidden_dropout_prob = 11 [default = 0.0]; + // Dropout probability of the attention probabilities + required float attention_probs_dropout_prob = 12 [default = 0.0]; + + // Whether to add embeddings for different text sequence features + required bool use_token_type = 13 [default = false]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 14 [default = true]; + // Maximum sequence length that might ever be used with this model + required uint32 max_position_embeddings = 15 [default = 0]; + // Dropout probability for text sequence embeddings + required float text_seq_emb_dropout_prob = 16 [default = 0.1]; + // dnn layers for other features + optional DNN other_feature_dnn = 17; +} + message CMBF { required CMBFTower config = 1; diff --git a/easy_rec/python/protos/dbmtl.proto b/easy_rec/python/protos/dbmtl.proto index 841b8adec..9adff1f62 100644 --- a/easy_rec/python/protos/dbmtl.proto +++ b/easy_rec/python/protos/dbmtl.proto @@ -3,7 +3,8 @@ package protos; import "easy_rec/python/protos/dnn.proto"; import "easy_rec/python/protos/tower.proto"; -import "easy_rec/python/protos/layer.proto"; +import "easy_rec/python/protos/cmbf.proto"; +import "easy_rec/python/protos/uniter.proto"; message DBMTL { // shared bottom cmbf layer diff --git a/easy_rec/python/protos/dnn.proto b/easy_rec/python/protos/dnn.proto index 021d34dbb..ff40f0fe4 100644 --- a/easy_rec/python/protos/dnn.proto +++ b/easy_rec/python/protos/dnn.proto @@ -12,3 +12,20 @@ message DNN { // use batch normalization optional bool use_bn = 4 [default = true]; } + +message MLP { + // hidden units for each layer + repeated uint32 hidden_units = 1; + // ratio of dropout + repeated float dropout_ratio = 2; + // activation function + optional string activation = 3 [default = 'relu']; + // use batch normalization + optional bool use_bn = 4 [default = true]; + optional bool use_final_bn = 5 [default = true]; + optional string final_activation = 6 [default = 'relu']; + optional bool use_bias = 7 [default = true]; + // kernel_initializer + optional string initializer = 8 [default = 'he_uniform']; + optional bool use_bn_after_activation = 9; +} diff --git a/easy_rec/python/protos/easy_rec_model.proto b/easy_rec/python/protos/easy_rec_model.proto index 27dcefadc..1e926c368 100644 --- a/easy_rec/python/protos/easy_rec_model.proto +++ b/easy_rec/python/protos/easy_rec_model.proto @@ -1,6 +1,7 @@ syntax = "proto2"; package protos; +import "easy_rec/python/protos/backbone.proto"; import "easy_rec/python/protos/fm.proto"; import "easy_rec/python/protos/deepfm.proto"; import "easy_rec/python/protos/wide_and_deep.proto"; @@ -24,9 +25,17 @@ import "easy_rec/python/protos/loss.proto"; import "easy_rec/python/protos/rocket_launching.proto"; import "easy_rec/python/protos/variational_dropout.proto"; import "easy_rec/python/protos/multi_tower_recall.proto"; +import "easy_rec/python/protos/tower.proto"; + // for input performance test message DummyModel { +} +// configure backbone network common parameters +message ModelParams { + optional float l2_regularization = 1; + optional uint32 wide_output_dim = 2; + repeated BayesTaskTower task_towers = 3; } // for knowledge distillation @@ -44,17 +53,19 @@ message KD { optional float loss_weight = 4 [default=1.0]; // only for loss_type == CROSS_ENTROPY_LOSS optional float temperature = 5 [default=1.0]; - } message EasyRecModel { required string model_class = 1; + // just a name for backbone config + optional string model_name = 99; // actually input layers, each layer produce a group of feature repeated FeatureGroupConfig feature_groups = 2; // model parameters oneof model { + ModelParams model_params = 100; DummyModel dummy = 101; WideAndDeep wide_and_deep = 102; DeepFM deepfm = 103; @@ -102,4 +113,12 @@ message EasyRecModel { repeated Loss losses = 15; + enum LossWeightStrategy { + Fixed = 0; + Uncertainty = 1; + Random = 2; + } + required LossWeightStrategy loss_weight_strategy = 16 [default = Fixed]; + + optional BackboneTower backbone = 17; } diff --git a/easy_rec/python/protos/feature_config.proto b/easy_rec/python/protos/feature_config.proto index 596e87e4d..e05e73753 100644 --- a/easy_rec/python/protos/feature_config.proto +++ b/easy_rec/python/protos/feature_config.proto @@ -42,6 +42,7 @@ message FeatureConfig { LookupFeature = 4; SequenceFeature = 5; ExprFeature = 6; + ConstFeature = 7; } enum FieldType { @@ -127,6 +128,9 @@ message FeatureConfig { // embedding variable params optional EVParams ev_params = 31; + + // fg complexity + optional float complexity = 32 [default = 1.0]; } message FeatureConfigV2 { diff --git a/easy_rec/python/protos/fm.proto b/easy_rec/python/protos/fm.proto index c90af8cab..31d8f27d7 100644 --- a/easy_rec/python/protos/fm.proto +++ b/easy_rec/python/protos/fm.proto @@ -2,5 +2,6 @@ syntax = "proto2"; package protos; message FM { + optional bool use_variant = 1; optional float l2_regularization = 5 [default = 1e-4]; } diff --git a/easy_rec/python/protos/keras_layer.proto b/easy_rec/python/protos/keras_layer.proto new file mode 100644 index 000000000..2798260d3 --- /dev/null +++ b/easy_rec/python/protos/keras_layer.proto @@ -0,0 +1,27 @@ +syntax = "proto2"; +package protos; + +import "google/protobuf/struct.proto"; +import "easy_rec/python/protos/layer.proto"; +import "easy_rec/python/protos/dnn.proto"; +import "easy_rec/python/protos/fm.proto"; +import "easy_rec/python/protos/seq_encoder.proto"; + +message KerasLayer { + required string class_name = 1; + oneof params { + google.protobuf.Struct st_params = 2; + PeriodicEmbedding periodic_embedding = 3; + AutoDisEmbedding auto_dis_embedding = 4; + FM fm = 5; + MaskBlock mask_block = 6; + MaskNet masknet = 7; + SENet senet = 8; + Bilinear bilinear = 9; + FiBiNet fibinet = 10; + MLP mlp = 11; + DINEncoder din = 12; + BSTEncoder bst = 13; + MMoELayer mmoe = 14; + } +} diff --git a/easy_rec/python/protos/layer.proto b/easy_rec/python/protos/layer.proto index 6cea6d3bd..52a1cbf30 100644 --- a/easy_rec/python/protos/layer.proto +++ b/easy_rec/python/protos/layer.proto @@ -4,73 +4,68 @@ package protos; import "easy_rec/python/protos/dnn.proto"; message HighWayTower { - required string input = 1; + optional string input = 1; required uint32 emb_size = 2; + required string activation = 3 [default = 'gelu']; + optional float dropout_rate = 4; } -message CMBFTower { - // The number of heads of cross modal fusion layer - required uint32 multi_head_num = 1 [default = 1]; - // The number of heads of image feature learning layer - required uint32 image_multi_head_num = 101 [default = 1]; - // The number of heads of text feature learning layer - required uint32 text_multi_head_num = 102 [default = 1]; - // The dimension of text heads - required uint32 text_head_size = 2; - // The dimension of image heads - required uint32 image_head_size = 3 [default = 64]; - // The number of patches of image feature, take effect when there is only one image feature - required uint32 image_feature_patch_num = 4 [default = 1]; - // Do dimension reduce to this size for image feature before single modal learning module - required uint32 image_feature_dim = 5 [default = 0]; - // The number of self attention layers for image features - required uint32 image_self_attention_layer_num = 6 [default = 0]; - // The number of self attention layers for text features - required uint32 text_self_attention_layer_num = 7 [default = 1]; - // The number of cross modal layers - required uint32 cross_modal_layer_num = 8 [default = 1]; - // The dimension of image cross modal heads - required uint32 image_cross_head_size = 9; - // The dimension of text cross modal heads - required uint32 text_cross_head_size = 10; - // Dropout probability for hidden layers - required float hidden_dropout_prob = 11 [default = 0.0]; - // Dropout probability of the attention probabilities - required float attention_probs_dropout_prob = 12 [default = 0.0]; +message PeriodicEmbedding { + required uint32 embedding_dim = 1; + required float sigma = 2; + optional bool add_linear_layer = 3 [default = true]; + optional string linear_activation = 4 [default = 'relu']; + optional bool output_3d_tensor = 5; + optional bool output_tensor_list = 6; +} + +message AutoDisEmbedding { + required uint32 embedding_dim = 1; + required uint32 num_bins = 2; + required float keep_prob = 3 [default = 0.8]; + required float temperature = 4; + optional bool output_3d_tensor = 5; + optional bool output_tensor_list = 6; +} + +message SENet { + required uint32 reduction_ratio = 1 [default = 4]; + optional uint32 num_squeeze_group = 2 [default = 2]; + optional bool use_skip_connection = 3 [default = true]; + optional bool use_output_layer_norm = 4 [default = true]; +} + +message Bilinear { + required string type = 1 [default = 'interaction']; + required bool use_plus = 2 [default = true]; + required uint32 num_output_units = 3; +} + +message FiBiNet { + optional Bilinear bilinear = 1; + required SENet senet = 2; + optional MLP mlp = 8; +} + +message MaskBlock { + optional float reduction_factor = 1; + required uint32 output_size = 2; + optional uint32 aggregation_size = 3; + optional bool input_layer_norm = 4 [default = true]; + optional uint32 projection_dim = 5; +} - // Whether to add embeddings for different text sequence features - required bool use_token_type = 13 [default = false]; - // Whether to add position embeddings for the position of each token in the text sequence - required bool use_position_embeddings = 14 [default = true]; - // Maximum sequence length that might ever be used with this model - required uint32 max_position_embeddings = 15 [default = 0]; - // Dropout probability for text sequence embeddings - required float text_seq_emb_dropout_prob = 16 [default = 0.1]; - // dnn layers for other features - optional DNN other_feature_dnn = 17; +message MaskNet { + repeated MaskBlock mask_blocks = 1; + required bool use_parallel = 2 [default = true]; + optional MLP mlp = 3; } -message UniterTower { - // Size of the encoder layers and the pooler layer - required uint32 hidden_size = 1; - // Number of hidden layers in the Transformer encoder - required uint32 num_hidden_layers = 2; - // Number of attention heads for each attention layer in the Transformer encoder - required uint32 num_attention_heads = 3; - // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder - required uint32 intermediate_size = 4; - // The non-linear activation function (function or string) in the encoder and pooler. - required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. - // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler - required float hidden_dropout_prob = 6 [default = 0.1]; - // The dropout ratio for the attention probabilities - required float attention_probs_dropout_prob = 7 [default = 0.1]; - // The maximum sequence length that this model might ever be used with - required uint32 max_position_embeddings = 8 [default = 512]; - // Whether to add position embeddings for the position of each token in the text sequence - required bool use_position_embeddings = 9 [default = true]; - // The stddev of the truncated_normal_initializer for initializing all weight matrices - required float initializer_range = 10 [default = 0.02]; - // dnn layers for other features - optional DNN other_feature_dnn = 11; +message MMoELayer { + // number of tasks + required uint32 num_task = 1; + // mmoe expert mlp layer definition + optional MLP expert_mlp = 2; + // number of mmoe experts + optional uint32 num_expert = 3; } diff --git a/easy_rec/python/protos/loss.proto b/easy_rec/python/protos/loss.proto index c5b74f47d..5c913bf6e 100644 --- a/easy_rec/python/protos/loss.proto +++ b/easy_rec/python/protos/loss.proto @@ -93,4 +93,6 @@ message PairwiseLogisticLoss { message JRCLoss { required string session_name = 1; optional float alpha = 2 [default = 0.5]; + optional bool same_label_loss = 3 [default = true]; + required string loss_weight_strategy = 4 [default = 'fixed']; } diff --git a/easy_rec/python/protos/seq_encoder.proto b/easy_rec/python/protos/seq_encoder.proto new file mode 100644 index 000000000..2b845a429 --- /dev/null +++ b/easy_rec/python/protos/seq_encoder.proto @@ -0,0 +1,37 @@ +syntax = "proto2"; +package protos; + +import "easy_rec/python/protos/dnn.proto"; + + +message BSTEncoder { + // Size of the encoder layers and the pooler layer + required uint32 hidden_size = 1; + // Number of hidden layers in the Transformer encoder + required uint32 num_hidden_layers = 2; + // Number of attention heads for each attention layer in the Transformer encoder + required uint32 num_attention_heads = 3; + // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder + required uint32 intermediate_size = 4; + // The non-linear activation function (function or string) in the encoder and pooler. + required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. + // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler + required float hidden_dropout_prob = 6 [default = 0.1]; + // The dropout ratio for the attention probabilities + required float attention_probs_dropout_prob = 7 [default = 0.1]; + // The maximum sequence length that this model might ever be used with + required uint32 max_position_embeddings = 8 [default = 512]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 9 [default = true]; + // The stddev of the truncated_normal_initializer for initializing all weight matrices + required float initializer_range = 10 [default = 0.02]; +} + +message DINEncoder { + // din attention layer + required DNN attention_dnn = 1; + // whether to keep target item feature + required bool need_target_feature = 2 [default = true]; + // option: softmax, sigmoid + required string attention_normalizer = 3 [default = 'softmax']; +} diff --git a/easy_rec/python/protos/uniter.proto b/easy_rec/python/protos/uniter.proto index 7e78ad23e..9efc1dc9e 100644 --- a/easy_rec/python/protos/uniter.proto +++ b/easy_rec/python/protos/uniter.proto @@ -1,9 +1,33 @@ syntax = "proto2"; package protos; -import "easy_rec/python/protos/layer.proto"; import "easy_rec/python/protos/dnn.proto"; +message UniterTower { + // Size of the encoder layers and the pooler layer + required uint32 hidden_size = 1; + // Number of hidden layers in the Transformer encoder + required uint32 num_hidden_layers = 2; + // Number of attention heads for each attention layer in the Transformer encoder + required uint32 num_attention_heads = 3; + // The size of the "intermediate" (i.e. feed-forward) layer in the Transformer encoder + required uint32 intermediate_size = 4; + // The non-linear activation function (function or string) in the encoder and pooler. + required string hidden_act = 5 [default = 'gelu']; // "gelu", "relu", "tanh" and "swish" are supported. + // The dropout probability for all fully connected layers in the embeddings, encoder, and pooler + required float hidden_dropout_prob = 6 [default = 0.1]; + // The dropout ratio for the attention probabilities + required float attention_probs_dropout_prob = 7 [default = 0.1]; + // The maximum sequence length that this model might ever be used with + required uint32 max_position_embeddings = 8 [default = 512]; + // Whether to add position embeddings for the position of each token in the text sequence + required bool use_position_embeddings = 9 [default = true]; + // The stddev of the truncated_normal_initializer for initializing all weight matrices + required float initializer_range = 10 [default = 0.02]; + // dnn layers for other features + optional DNN other_feature_dnn = 11; +} + message Uniter { required UniterTower config = 1; diff --git a/easy_rec/python/protos/variational_dropout.proto b/easy_rec/python/protos/variational_dropout.proto index e72ca54c6..a1bb39974 100644 --- a/easy_rec/python/protos/variational_dropout.proto +++ b/easy_rec/python/protos/variational_dropout.proto @@ -7,4 +7,15 @@ message VariationalDropoutLayer{ optional float regularization_lambda = 1 [default = 0.01]; // variational_dropout dimension optional bool embedding_wise_variational_dropout = 2 [default = false]; + + // whether to use FSCD model + optional bool regularize_by_feature_complexity = 3 [default = false]; + optional float feature_complexity_weight = 4 [default = 1.0]; + optional float feature_dimension_weight = 5 [default = 1e-2]; + optional float feature_cardinality_weight = 6 [default = 1e-7]; + // temperature + optional float temperature = 7 [default = 0.1]; + + optional float min_keep_ratio = 8 [default = 1e-3]; + optional float max_keep_ratio = 9 [default = 1.0]; } diff --git a/easy_rec/python/test/train_eval_test.py b/easy_rec/python/test/train_eval_test.py index 4560f89c6..5680cadb3 100644 --- a/easy_rec/python/test/train_eval_test.py +++ b/easy_rec/python/test/train_eval_test.py @@ -306,6 +306,11 @@ def test_bst(self): 'samples/model_config/bst_on_taobao.config', self._test_dir) self.assertTrue(self._success) + # def test_bst_contrastive_learning(self): + # self._success = test_utils.test_single_train_eval( + # 'samples/model_config/bst_cl_on_taobao.config', self._test_dir) + # self.assertTrue(self._success) + def test_dcn(self): self._success = test_utils.test_single_train_eval( 'samples/model_config/dcn_on_taobao.config', self._test_dir) @@ -955,6 +960,7 @@ def test_distribute_eval_deepfm_multi_cls(self): def test_distribute_eval_deepfm_single_cls(self): cur_eval_path = 'data/test/distribute_eval_test/dwd_distribute_eval_avazu_out_test_combo' + #cur_eval_path = '/Users/weisu.yxd/Code/EasyRec/experiments/distribute_eval_test/dwd_distribute_eval_avazu_out_test_combo' self._success = test_utils.test_distributed_eval( 'samples/model_config/deepfm_distribute_eval_combo_on_avazu_ctr.config', cur_eval_path, self._test_dir) diff --git a/easy_rec/python/tools/__init__.py b/easy_rec/python/tools/__init__.py index e69de29bb..d8300f4e3 100644 --- a/easy_rec/python/tools/__init__.py +++ b/easy_rec/python/tools/__init__.py @@ -0,0 +1 @@ +# from .explainer.explainer import create_explainer diff --git a/easy_rec/python/tools/explainer/__init__.py b/easy_rec/python/tools/explainer/__init__.py new file mode 100644 index 000000000..c1917b9fd --- /dev/null +++ b/easy_rec/python/tools/explainer/__init__.py @@ -0,0 +1 @@ +# from .methods import DeepExplain diff --git a/easy_rec/python/tools/explainer/deep_shap.py b/easy_rec/python/tools/explainer/deep_shap.py new file mode 100644 index 000000000..64508232f --- /dev/null +++ b/easy_rec/python/tools/explainer/deep_shap.py @@ -0,0 +1,766 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import warnings + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import ops as tf_ops +from tensorflow.python.ops import gradients_impl as tf_gradients_impl + +if not hasattr(tf_gradients_impl, '_IsBackpropagatable'): + from tensorflow.python.ops import gradients_util as tf_gradients_impl + + +class DeepShap(object): + """Meant to approximate SHAP values for deep learning models. + + This is an enhanced version of the DeepLIFT algorithm (Deep SHAP) where, similar to Kernel SHAP, we + approximate the conditional expectations of SHAP values using a selection of background samples. + Lundberg and Lee, NIPS 2017 showed that the per node attribution rules in DeepLIFT (Shrikumar, + Greenside, and Kundaje, arXiv 2017) can be chosen to approximate Shapley values. By integrating + over many backgound samples Deep estimates approximate SHAP values such that they sum + up to the difference between the expected model output on the passed background samples and the + current model output (f(x) - E[f(x)]). + """ + + def __init__(self, + inputs, + output, + data, + session=None, + learning_phase_flags=None): + """An explainer object for a deep model using a given background dataset. + + Note that the complexity of the method scales linearly with the number of background data + samples. Passing the entire training dataset as `data` will give very accurate expected + values, but be unreasonably expensive. The variance of the expectation estimates scale by + roughly 1/sqrt(N) for N background data samples. So 100 samples will give a good estimate, + and 1000 samples a very good estimate of the expected values. + + Parameters + ---------- + inputs : [tf.Operation] + output : tf.Operation + A pair of TensorFlow operations (or a list and an op) that + specifies the input and output of the model to be explained. Note that SHAP values + are specific to a single output value, so you get an explanation for each element of + the output tensor (which must be a flat rank one vector). + + data : [numpy.array] or [pandas.DataFrame] or function + The background dataset to use for integrating out features. DeepExplainer integrates + over all these samples for each explanation. The data passed here must match the input + operations given to the model. If a function is supplied, it must be a function that + takes a particular input example and generates the background dataset for that example + session : None or tensorflow.Session + The TensorFlow session that has the model we are explaining. If None is passed then + we do our best to find the right session, first looking for a keras session, then + falling back to the default TensorFlow session. + + learning_phase_flags : None or list of tensors + If you have your own custom learning phase flags pass them here. When explaining a prediction + we need to ensure we are not in training mode, since this changes the behavior of ops like + batch norm or dropout. If None is passed then we look for tensors in the graph that look like + learning phase flags. Note that we assume all the flags should + have a value of False during predictions (and hence explanations). + """ + self.model_inputs = inputs + self.model_output = output + assert type( + self.model_output + ) != list, 'The model output to be explained must be a single tensor!' + assert len(self.model_output.shape + ) < 3, 'The model output must be a vector or a single value!' + self.multi_output = True + if len(self.model_output.shape) == 1: + self.multi_output = False + + # check if we have multiple inputs + self.multi_input = True + if type(self.model_inputs) != list or len(self.model_inputs) == 1: + self.multi_input = False + if type(self.model_inputs) != list: + self.model_inputs = [self.model_inputs] + if type(data) != list and (hasattr(data, '__call__') == False): + data = [data] + self.data = data + + self._vinputs = { + } # used to track what op inputs depends on the model inputs + self.orig_grads = {} + + if session is None: + try: + session = tf.compat.v1.keras.backend.get_session() + except: + session = tf.keras.backend.get_session() + self.session = tf.get_default_session() if session is None else session + self.graph = self.session.graph + + # if no learning phase flags were given we go looking for them + # ...this will catch the one that keras uses + # we need to find them since we want to make sure learning phase flags are set to False + if learning_phase_flags is None: + self.learning_phase_ops = [] + for op in self.graph.get_operations(): + if 'learning_phase' in op.name and op.type == 'Const' and len( + op.outputs[0].shape) == 0: + if op.outputs[0].dtype == tf.bool: + self.learning_phase_ops.append(op) + self.learning_phase_flags = [ + op.outputs[0] for op in self.learning_phase_ops + ] + else: + self.learning_phase_ops = [t.op for t in learning_phase_flags] + + # save the expected output of the model + # if self.data is a function, set self.expected_value to None + if (hasattr(self.data, '__call__')): + self.expected_value = None + else: + if self.data[0].shape[0] > 5000: + warnings.warn( + 'You have provided over 5k background samples! For better performance consider using smaller random sample.' + ) + self.expected_value = self.run(self.model_output, self.model_inputs, + self.data).mean(0) + + self._init_between_tensors(self.model_output.op, self.model_inputs) + + # make a blank array that will get lazily filled in with the SHAP value computation + # graphs for each output. Lazy is important since if there are 1000 outputs and we + # only explain the top 5 it would be a waste to build graphs for the other 995 + if not self.multi_output: + self.phi_symbolics = [None] + else: + noutputs = self.model_output.shape.as_list()[1] + if noutputs is not None: + self.phi_symbolics = [None for i in range(noutputs)] + else: + raise Exception( + 'The model output tensor to be explained cannot have a static shape in dim 1 of None!' + ) + + def run(self, out, model_inputs, X): + """Runs the model while also setting the learning phase flags to False.""" + feed_dict = dict(zip(model_inputs, X)) + for t in self.learning_phase_flags: + feed_dict[t] = False + return self.session.run(out, feed_dict) + + def phi_symbolic(self, i): + """Get the SHAP value computation graph for a given model output.""" + if self.phi_symbolics[i] is None: + + def anon(): + out = self.model_output[:, + i] if self.multi_output else self.model_output + return tf.gradients(out, self.model_inputs) + + self.phi_symbolics[i] = self.execute_with_overridden_gradients(anon) + + return self.phi_symbolics[i] + + def custom_grad(self, op, *grads): + """Passes a gradient op creation request to the correct handler.""" + type_name = op.type[5:] if op.type.startswith('shap_') else op.type + out = op_handlers[type_name]( + self, op, *grads) # we cut off the shap_ prefex before the lookup + return out + + def execute_with_overridden_gradients(self, f): + # replace the gradients for all the non-linear activations + # we do this by hacking our way into the registry (TODO: find a public API for this if it exists) + reg = tf_ops._gradient_registry._registry + ops_not_in_registry = ['TensorListReserve'] + # NOTE: location_tag taken from tensorflow source for None type ops + location_tag = ('UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN', 'UNKNOWN') + # TODO: unclear why some ops are not in the registry with TF 2.0 like TensorListReserve + for non_reg_ops in ops_not_in_registry: + reg[non_reg_ops] = {'type': None, 'location': location_tag} + for n in op_handlers: + if n in reg: + self.orig_grads[n] = reg[n]['type'] + reg['shap_' + n] = { + 'type': self.custom_grad, + 'location': reg[n]['location'] + } + reg[n]['type'] = self.custom_grad + + # In TensorFlow 1.10 they started pruning out nodes that they think can't be backpropped + # unfortunately that includes the index of embedding layers so we disable that check here + if hasattr(tf_gradients_impl, '_IsBackpropagatable'): + orig_IsBackpropagatable = tf_gradients_impl._IsBackpropagatable + tf_gradients_impl._IsBackpropagatable = lambda tensor: True + + # define the computation graph for the attribution values using a custom gradient-like computation + try: + out = f() + finally: + # reinstate the backpropagatable check + if hasattr(tf_gradients_impl, '_IsBackpropagatable'): + tf_gradients_impl._IsBackpropagatable = orig_IsBackpropagatable + + # restore the original gradient definitions + for n in op_handlers: + if n in reg: + del reg['shap_' + n] + reg[n]['type'] = self.orig_grads[n] + for non_reg_ops in ops_not_in_registry: + del reg[non_reg_ops] + return out + + def shap_values(self, + X, + ranked_outputs=None, + output_rank_order='max', + check_additivity=True): + """Return approximate SHAP values for the model applied to the data given by X. + + Parameters + ---------- + X : list, numpy.array, or pandas.DataFrame + A tensor (or list of tensors) of samples (where X.shape[0] == # samples) on which to + explain the model's output. + + ranked_outputs : None or int + If ranked_outputs is None then we explain all the outputs in a multi-output model. If + ranked_outputs is a positive integer then we only explain that many of the top model + outputs (where "top" is determined by output_rank_order). Note that this causes a pair + of values to be returned (shap_values, indexes), where shap_values is a list of numpy + arrays for each of the output ranks, and indexes is a matrix that indicates for each sample + which output indexes were choses as "top". + + output_rank_order : "max", "min", or "max_abs" + How to order the model outputs when using ranked_outputs, either by maximum, minimum, or + maximum absolute value. + + Returns + ------- + array or list + For a models with a single output this returns a tensor of SHAP values with the same shape + as X. For a model with multiple outputs this returns a list of SHAP value tensors, each of + which are the same shape as X. If ranked_outputs is None then this list of tensors matches + the number of model outputs. If ranked_outputs is a positive integer a pair is returned + (shap_values, indexes), where shap_values is a list of tensors with a length of + ranked_outputs, and indexes is a matrix that indicates for each sample which output indexes + were chosen as "top". + """ + # check if we have multiple inputs + if not self.multi_input: + if type(X) == list and len(X) != 1: + assert False, 'Expected a single tensor as model input!' + elif type(X) != list: + X = [X] + else: + assert type(X) == list, 'Expected a list of model inputs!' + assert len(self.model_inputs) == len( + X + ), 'Number of model inputs (%d) does not match the number given (%d)!' % ( + len(self.model_inputs), len(X)) + + # rank and determine the model outputs that we will explain + if ranked_outputs is not None and self.multi_output: + model_output_values = self.run(self.model_output, self.model_inputs, X) + + if output_rank_order == 'max': + model_output_ranks = np.argsort(-model_output_values) + elif output_rank_order == 'min': + model_output_ranks = np.argsort(model_output_values) + elif output_rank_order == 'max_abs': + model_output_ranks = np.argsort(np.abs(model_output_values)) + else: + assert False, 'output_rank_order must be max, min, or max_abs!' + model_output_ranks = model_output_ranks[:, :ranked_outputs] + else: + model_output_ranks = np.tile( + np.arange(len(self.phi_symbolics)), (X[0].shape[0], 1)) + + # compute the attributions + output_phis = [] + for i in range(model_output_ranks.shape[1]): + phis = [] + for k in range(len(X)): + phis.append(np.zeros(X[k].shape)) + for j in range(X[0].shape[0]): + if (hasattr(self.data, '__call__')): + bg_data = self.data([X[l][j] for l in range(len(X))]) + if type(bg_data) != list: + bg_data = [bg_data] + else: + bg_data = self.data + + # tile the inputs to line up with the background data samples + tiled_X = [ + np.tile(X[l][j:j + 1], (bg_data[l].shape[0],) + + tuple([1 + for k in range(len(X[l].shape) - 1)])) + for l in range(len(X)) + ] + + # we use the first sample for the current sample and the rest for the references + joint_input = [ + np.concatenate([tiled_X[l], bg_data[l]], 0) for l in range(len(X)) + ] + + # run attribution computation graph + feature_ind = model_output_ranks[j, i] + sample_phis = self.run( + self.phi_symbolic(feature_ind), self.model_inputs, joint_input) + + # assign the attributions to the right part of the output arrays + for l in range(len(X)): + phis[l][j] = (sample_phis[l][bg_data[l].shape[0]:] * + (X[l][j] - bg_data[l])).mean(0) + + output_phis.append(phis[0] if not self.multi_input else phis) + + # check that the SHAP values sum up to the model output + if check_additivity: + model_output = self.run(self.model_output, self.model_inputs, X) + for l in range(len(self.expected_value)): + if not self.multi_input: + diffs = model_output[:, + l] - self.expected_value[l] - output_phis[l].sum( + axis=tuple(range(1, output_phis[l].ndim))) + else: + diffs = model_output[:, l] - self.expected_value[l] + for i in range(len(output_phis[l])): + diffs -= output_phis[l][i].sum( + axis=tuple(range(1, output_phis[l][i].ndim))) + assert np.abs( + diffs).max() < 1e-2, "The SHAP explanations do not sum up to the model's output! This is either because of a " \ + 'rounding error or because an operator in your computation graph was not fully supported. If ' \ + 'the sum difference of %f is significant compared the scale of your model outputs please post ' \ + 'as a github issue, with a reproducible example if possible so we can debug it.' % np.abs( + diffs).max() + + if not self.multi_output: + return output_phis[0] + elif ranked_outputs is not None: + return output_phis, model_output_ranks + else: + return output_phis + + def _init_between_tensors(self, out_op, model_inputs): + # find all the operations in the graph between our inputs and outputs + tensor_blacklist = tensors_blocked_by_false( + self.learning_phase_ops) # don't follow learning phase branches + dependence_breakers = [ + k for k in op_handlers if op_handlers[k] == break_dependence + ] + back_ops = backward_walk_ops([out_op], tensor_blacklist, + dependence_breakers) + start_ops = [] + for minput in model_inputs: + for op in minput.consumers(): + start_ops.append(op) + self.between_ops = forward_walk_ops( + start_ops, tensor_blacklist, dependence_breakers, within_ops=back_ops) + + # note all the tensors that are on the path between the inputs and the output + self.between_tensors = {} + for op in self.between_ops: + for t in op.outputs: + self.between_tensors[t.name] = True + for t in model_inputs: + self.between_tensors[t.name] = True + + # save what types are being used + self.used_types = {} + for op in self.between_ops: + self.used_types[op.type] = True + + def _variable_inputs(self, op): + """Return which inputs of this operation are variable (i.e. depend on the model inputs).""" + if op not in self._vinputs: + out = np.zeros(len(op.inputs), dtype=np.bool) + for i, t in enumerate(op.inputs): + out[i] = t.name in self.between_tensors + self._vinputs[op] = out + return self._vinputs[op] + + +def tensors_blocked_by_false(ops): + """Follows a set of ops assuming their value is False and find blocked Switch paths. + + This is used to prune away parts of the model graph that are only used during the training + phase (like dropout, batch norm, etc.). + """ + blocked = [] + + def recurse(op): + if op.type == 'Switch': + blocked.append( + op.outputs[1] + ) # the true path is blocked since we assume the ops we trace are False + else: + for out in op.outputs: + for c in out.consumers(): + recurse(c) + + for op in ops: + recurse(op) + + return blocked + + +def backward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist): + found_ops = [] + op_stack = [op for op in start_ops] + while len(op_stack) > 0: + op = op_stack.pop() + if op.type not in op_type_blacklist and op not in found_ops: + found_ops.append(op) + for input in op.inputs: + if input not in tensor_blacklist: + op_stack.append(input.op) + return found_ops + + +def forward_walk_ops(start_ops, tensor_blacklist, op_type_blacklist, + within_ops): + found_ops = [] + op_stack = [op for op in start_ops] + while len(op_stack) > 0: + op = op_stack.pop() + if op.type not in op_type_blacklist and op in within_ops and op not in found_ops: + found_ops.append(op) + for out in op.outputs: + if out not in tensor_blacklist: + for c in out.consumers(): + op_stack.append(c) + return found_ops + + +def linearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func): + + def handler(explainer, op, *grads): + var = explainer._variable_inputs(op) + if var[input_ind0] and not var[input_ind1]: + return linearity_1d_handler(input_ind0, explainer, op, *grads) + elif var[input_ind1] and not var[input_ind0]: + return linearity_1d_handler(input_ind1, explainer, op, *grads) + elif var[input_ind0] and var[input_ind1]: + return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, + op, *grads) + else: + return [None for _ in op.inputs + ] # no inputs vary, we must be hidden by a switch function + + return handler + + +def nonlinearity_1d_nonlinearity_2d(input_ind0, input_ind1, op_func): + + def handler(explainer, op, *grads): + var = explainer._variable_inputs(op) + if var[input_ind0] and not var[input_ind1]: + return nonlinearity_1d_handler(input_ind0, explainer, op, *grads) + elif var[input_ind1] and not var[input_ind0]: + return nonlinearity_1d_handler(input_ind1, explainer, op, *grads) + elif var[input_ind0] and var[input_ind1]: + return nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, + op, *grads) + else: + return [None for _ in op.inputs + ] # no inputs vary, we must be hidden by a switch function + + return handler + + +def nonlinearity_1d(input_ind): + + def handler(explainer, op, *grads): + return nonlinearity_1d_handler(input_ind, explainer, op, *grads) + + return handler + + +def nonlinearity_1d_handler(input_ind, explainer, op, *grads): + # make sure only the given input varies + op_inputs = op.inputs + if op_inputs is None: + op_inputs = op.outputs[0].op.inputs + + for i in range(len(op_inputs)): + if i != input_ind: + assert not explainer._variable_inputs( + op)[i], str(i) + 'th input to ' + op.name + ' cannot vary!' + + xin0, rin0 = tf.split(op_inputs[input_ind], 2) + xout, rout = tf.split(op.outputs[input_ind], 2) + delta_in0 = xin0 - rin0 + if delta_in0.shape is None: + dup0 = [2, 1] + else: + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + out = [None for _ in op_inputs] + if op.type.startswith('shap_'): + op.type = op.type[5:] + orig_grad = explainer.orig_grads[op.type](op, grads[0]) + out[input_ind] = tf.where( + tf.tile(tf.abs(delta_in0), dup0) < 1e-6, + orig_grad[input_ind] if len(op_inputs) > 1 else orig_grad, + grads[0] * tf.tile((xout - rout) / delta_in0, dup0)) + return out + + +def nonlinearity_2d_handler(input_ind0, input_ind1, op_func, explainer, op, + *grads): + assert input_ind0 == 0 and input_ind1 == 1, "TODO: Can't yet handle double inputs that are not first!" + xout, rout = tf.split(op.outputs[0], 2) + in0 = op.inputs[input_ind0] + in1 = op.inputs[input_ind1] + xin0, rin0 = tf.split(in0, 2) + xin1, rin1 = tf.split(in1, 2) + delta_in0 = xin0 - rin0 + delta_in1 = xin1 - rin1 + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + out10 = op_func(xin0, rin1) + out01 = op_func(rin0, xin1) + out11, out00 = xout, rout + out0 = 0.5 * (out11 - out01 + out10 - out00) + out0 = grads[0] * tf.tile(out0 / delta_in0, dup0) + out1 = 0.5 * (out11 - out10 + out01 - out00) + out1 = grads[0] * tf.tile(out1 / delta_in1, dup0) + + # Avoid divide by zero nans + out0 = tf.where( + tf.abs(tf.tile(delta_in0, dup0)) < 1e-7, tf.zeros_like(out0), out0) + out1 = tf.where( + tf.abs(tf.tile(delta_in1, dup0)) < 1e-7, tf.zeros_like(out1), out1) + + # see if due to broadcasting our gradient shapes don't match our input shapes + if (np.any(np.array(out1.shape) != np.array(in1.shape))): + broadcast_index = np.where( + np.array(out1.shape) != np.array(in1.shape))[0][0] + out1 = tf.reduce_sum(out1, axis=broadcast_index, keepdims=True) + elif (np.any(np.array(out0.shape) != np.array(in0.shape))): + broadcast_index = np.where( + np.array(out0.shape) != np.array(in0.shape))[0][0] + out0 = tf.reduce_sum(out0, axis=broadcast_index, keepdims=True) + + return [out0, out1] + + +def softmax(explainer, op, *grads): + """Just decompose softmax into its components and recurse, we can handle all of them :) + + We assume the 'axis' is the last dimension because the TF codebase swaps the 'axis' to + the last dimension before the softmax op if 'axis' is not already the last dimension. + We also don't subtract the max before tf.exp for numerical stability since that might + mess up the attributions and it seems like TensorFlow doesn't define softmax that way + (according to the docs) + """ + in0 = op.inputs[0] + in0_max = tf.reduce_max(in0, axis=-1, keepdims=True, name='in0_max') + in0_centered = in0 - in0_max + evals = tf.exp(in0_centered, name='custom_exp') + rsum = tf.reduce_sum(evals, axis=-1, keepdims=True) + div = evals / rsum + + # mark these as in-between the inputs and outputs + for op in [evals.op, rsum.op, div.op, in0_centered.op]: + for t in op.outputs: + if t.name not in explainer.between_tensors: + explainer.between_tensors[t.name] = False + + out = tf.gradients(div, in0_centered, grad_ys=grads[0])[0] + + # remove the names we just added + for op in [evals.op, rsum.op, div.op, in0_centered.op]: + for t in op.outputs: + if explainer.between_tensors[t.name] is False: + del explainer.between_tensors[t.name] + + # rescale to account for our shift by in0_max (which we did for numerical stability) + xin0, rin0 = tf.split(in0, 2) + xin0_centered, rin0_centered = tf.split(in0_centered, 2) + delta_in0 = xin0 - rin0 + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + return tf.where( + tf.tile(tf.abs(delta_in0), dup0) < 1e-6, out, + out * tf.tile((xin0_centered - rin0_centered) / delta_in0, dup0)) + + +def maxpool(explainer, op, *grads): + xin0, rin0 = tf.split(op.inputs[0], 2) + xout, rout = tf.split(op.outputs[0], 2) + delta_in0 = xin0 - rin0 + dup0 = [2] + [1 for i in delta_in0.shape[1:]] + cross_max = tf.maximum(xout, rout) + diffs = tf.concat([cross_max - rout, xout - cross_max], 0) + if op.type.startswith('shap_'): + op.type = op.type[5:] + xmax_pos, rmax_pos = tf.split( + explainer.orig_grads[op.type](op, grads[0] * diffs), 2) + return tf.tile( + tf.where( + tf.abs(delta_in0) < 1e-7, tf.zeros_like(delta_in0), + (xmax_pos + rmax_pos) / delta_in0), dup0) + + +def gather(explainer, op, *grads): + # params = op.inputs[0] + indices = op.inputs[1] + # axis = op.inputs[2] + var = explainer._variable_inputs(op) + if var[1] and not var[0]: + assert len(indices.shape + ) == 2, 'Only scalar indices supported right now in GatherV2!' + + xin1, rin1 = tf.split(tf.cast(op.inputs[1], tf.float32), 2) + xout, rout = tf.split(op.outputs[0], 2) + dup_in1 = [2] + [1 for i in xin1.shape[1:]] + dup_out = [2] + [1 for i in xout.shape[1:]] + delta_in1_t = tf.tile(xin1 - rin1, dup_in1) + out_sum = tf.reduce_sum( + grads[0] * tf.tile(xout - rout, dup_out), + list(range(len(indices.shape), len(grads[0].shape)))) + if op.type == 'ResourceGather': + return [ + None, + tf.where( + tf.abs(delta_in1_t) < 1e-6, tf.zeros_like(delta_in1_t), + out_sum / delta_in1_t) + ] + return [ + None, + tf.where( + tf.abs(delta_in1_t) < 1e-6, tf.zeros_like(delta_in1_t), + out_sum / delta_in1_t), None + ] + elif var[0] and not var[1]: + if op.type.startswith('shap_'): + op.type = op.type[5:] + return [explainer.orig_grads[op.type](op, grads[0]), + None] # linear in this case + else: + assert False, 'Axis not yet supported to be varying for gather op!' + + +def linearity_1d(input_ind): + + def handler(explainer, op, *grads): + return linearity_1d_handler(input_ind, explainer, op, *grads) + + return handler + + +def linearity_1d_handler(input_ind, explainer, op, *grads): + # make sure only the given input varies (negative means only that input cannot vary, and is measured from the end of the list) + for i in range(len(op.inputs)): + if i != input_ind: + assert not explainer._variable_inputs( + op)[i], str(i) + 'th input to ' + op.name + ' cannot vary!' + if op.type.startswith('shap_'): + op.type = op.type[5:] + return explainer.orig_grads[op.type](op, *grads) + + +def linearity_with_excluded(input_inds): + + def handler(explainer, op, *grads): + return linearity_with_excluded_handler(input_inds, explainer, op, *grads) + + return handler + + +def linearity_with_excluded_handler(input_inds, explainer, op, *grads): + # make sure the given inputs don't vary (negative is measured from the end of the list) + for i in range(len(op.inputs)): + if i in input_inds or i - len(op.inputs) in input_inds: + assert not explainer._variable_inputs( + op)[i], str(i) + 'th input to ' + op.name + ' cannot vary!' + if op.type.startswith('shap_'): + op.type = op.type[5:] + return explainer.orig_grads[op.type](op, *grads) + + +def passthrough(explainer, op, *grads): + if op.type.startswith('shap_'): + op.type = op.type[5:] + return explainer.orig_grads[op.type](op, *grads) + + +def break_dependence(explainer, op, *grads): + """This function name is used to break attribution dependence in the graph traversal. + + These operation types may be connected above input data values in the graph but their outputs + don't depend on the input values (for example they just depend on the shape). + """ + return [None for _ in op.inputs] + + +op_handlers = {} + +# ops that are always linear +op_handlers['Identity'] = passthrough +op_handlers['StridedSlice'] = passthrough +op_handlers['Squeeze'] = passthrough +op_handlers['ExpandDims'] = passthrough +op_handlers['Pack'] = passthrough +op_handlers['BiasAdd'] = passthrough +op_handlers['Unpack'] = passthrough +op_handlers['Add'] = passthrough +op_handlers['Sub'] = passthrough +op_handlers['Merge'] = passthrough +op_handlers['Sum'] = passthrough +op_handlers['Mean'] = passthrough +op_handlers['Cast'] = passthrough +op_handlers['Transpose'] = passthrough +op_handlers['Enter'] = passthrough +op_handlers['Exit'] = passthrough +op_handlers['NextIteration'] = passthrough +op_handlers['Tile'] = passthrough +op_handlers['TensorArrayScatterV3'] = passthrough +op_handlers['TensorArrayReadV3'] = passthrough +op_handlers['TensorArrayWriteV3'] = passthrough + +# ops that don't pass any attributions to their inputs +op_handlers['Shape'] = break_dependence +op_handlers['RandomUniform'] = break_dependence +op_handlers['ZerosLike'] = break_dependence +# op_handlers["StopGradient"] = break_dependence # this allows us to stop attributions when we want to (like softmax re-centering) + +# ops that are linear and only allow a single input to vary +op_handlers['Reshape'] = linearity_1d(0) +op_handlers['Pad'] = linearity_1d(0) +op_handlers['ReverseV2'] = linearity_1d(0) +op_handlers['ConcatV2'] = linearity_with_excluded([-1]) +op_handlers['Conv2D'] = linearity_1d(0) +op_handlers['Switch'] = linearity_1d(0) +op_handlers['AvgPool'] = linearity_1d(0) +op_handlers['FusedBatchNorm'] = linearity_1d(0) + +# ops that are nonlinear and only allow a single input to vary +op_handlers['Relu'] = nonlinearity_1d(0) +op_handlers['Elu'] = nonlinearity_1d(0) +op_handlers['Sigmoid'] = nonlinearity_1d(0) +op_handlers['Tanh'] = nonlinearity_1d(0) +op_handlers['Softplus'] = nonlinearity_1d(0) +op_handlers['Exp'] = nonlinearity_1d(0) +op_handlers['ClipByValue'] = nonlinearity_1d(0) +op_handlers['Rsqrt'] = nonlinearity_1d(0) +op_handlers['Square'] = nonlinearity_1d(0) +op_handlers['Max'] = nonlinearity_1d(0) + +# ops that are nonlinear and allow two inputs to vary +op_handlers['SquaredDifference'] = nonlinearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: (x - y) * (x - y)) +op_handlers['Minimum'] = nonlinearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: tf.minimum(x, y)) +op_handlers['Maximum'] = nonlinearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: tf.maximum(x, y)) + +# ops that allow up to two inputs to vary are are linear when only one input varies +op_handlers['Mul'] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x * y) +op_handlers['RealDiv'] = linearity_1d_nonlinearity_2d(0, 1, lambda x, y: x / y) +op_handlers['MatMul'] = linearity_1d_nonlinearity_2d( + 0, 1, lambda x, y: tf.matmul(x, y)) + +# ops that need their own custom attribution functions +op_handlers['GatherV2'] = gather +op_handlers['ResourceGather'] = gather +op_handlers['MaxPool'] = maxpool +op_handlers['Softmax'] = softmax diff --git a/easy_rec/python/tools/explainer/explainer.py b/easy_rec/python/tools/explainer/explainer.py new file mode 100644 index 000000000..04d2bc4dc --- /dev/null +++ b/easy_rec/python/tools/explainer/explainer.py @@ -0,0 +1,534 @@ +import abc +import collections +import logging +import os +import time + +import numpy as np +import six +import tensorflow as tf +from six import moves +from tensorflow.python.platform import gfile +from tensorflow.python.saved_model import signature_constants + +# from easy_rec.python.tools.explainer.deep_shap import DeepShap +from easy_rec.python.protos.dataset_pb2 import DatasetConfig +from easy_rec.python.tools.explainer.methods import DeepExplain +from easy_rec.python.utils.config_util import get_configs_from_pipeline_file +from easy_rec.python.utils.input_utils import get_type_defaults +from easy_rec.python.utils.load_class import get_register_class_meta + +_EXPLAINER_CLASS_MAP = {} +_register_abc_meta = get_register_class_meta( + _EXPLAINER_CLASS_MAP, have_abstract_class=True) + + +class Explainer(six.with_metaclass(_register_abc_meta, object)): + version = 1 + + def __init__(self, deep_explain, model_path, method_name): + """Base class for explainer. + + Args: + deep_explain: a deep explain context manager + model_path: saved_model directory or frozen pb file path + method_name: explain method name + """ + self.deep_explain = deep_explain + self.method = method_name + self._inputs_map = collections.OrderedDict() + self._outputs_map = collections.OrderedDict() + self._model_path = model_path + self._explainer = None + self._effective_fields = None + self._build_model() + + def _build_model(self): + model_path = self._model_path + logging.info('loading model from %s' % model_path) + if gfile.IsDirectory(model_path): + assert tf.saved_model.loader.maybe_saved_model_directory(model_path), \ + 'saved model does not exists in %s' % model_path + else: + raise ValueError('currently only savedmodel is supported, path:' + + model_path) + + input_fields = _get_input_fields_from_pipeline_config(model_path) + self._input_fields_info, self._input_fields = input_fields + + de = self.deep_explain + meta_graph_def = tf.saved_model.loader.load( + de.session, [tf.saved_model.tag_constants.SERVING], model_path) + # parse signature + signature_def = meta_graph_def.signature_def[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + inputs = signature_def.inputs + input_info = [] + self._is_multi_placeholder = len(inputs.items()) > 1 + if self._is_multi_placeholder: + for gid, item in enumerate(inputs.items()): + name, tensor = item + logging.info('Load input binding: %s -> %s' % (name, tensor.name)) + input_name = tensor.name + input_name, _ = input_name.split(':') + try: + input_id = input_name.split('_')[-1] + input_id = int(input_id) + except Exception: + # support for models that are not exported by easy_rec + # in which case, the order of inputs may not be the + # same as they are defined, therefore, list input + # could not be supported, only dict input could be supported + logging.warning('could not determine input_id from input_name: %s' % + input_name) + input_id = gid + input_info.append((input_id, name, tensor.dtype)) + self._inputs_map[name] = de.graph.get_tensor_by_name(tensor.name) + else: + # only one input, all features concatenate together + for name, tensor in inputs.items(): + logging.info('Load input binding: %s -> %s' % (name, tensor.name)) + input_info.append((0, name, tensor.dtype)) + self._inputs_map[name] = de.graph.get_tensor_by_name(tensor.name) + + # sort inputs by input_ids so as to match the order of csv data + input_info.sort(key=lambda t: t[0]) + self._input_names = [t[1] for t in input_info] + + outputs = signature_def.outputs + for name, tensor in outputs.items(): + logging.info('Load output binding: %s -> %s' % (name, tensor.name)) + self._outputs_map[name] = de.graph.get_tensor_by_name(tensor.name) + + # get assets + # self._assets = {} + # asset_files = tf.get_collection(constants.ASSETS_KEY) + # for any_proto in asset_files: + # asset_file = meta_graph_pb2.AssetFileDef() + # any_proto.Unpack(asset_file) + # type_name = asset_file.tensor_info.name.split(':')[0] + # asset_path = os.path.join(model_path, constants.ASSETS_DIRECTORY, + # asset_file.filename) + # assert gfile.Exists( + # asset_path), '%s is missing in saved model' % asset_path + # self._assets[type_name] = asset_path + # logging.info(self._assets) + + def default_values(self): + input_fields = self._input_fields if self._effective_fields is None else self._effective_fields + n = len(input_fields) + m = len(self._input_names) + assert m == n, 'the number input columns is not expected, %d given, %d expected\n' \ + 'model inputs: %s\ninput fields: %s' % (n, m, ','.join(self._input_names), ','.join(input_fields)) + + default_value = [] + for i, (field, name) in enumerate(zip(input_fields, self._input_names)): + assert field == name, 'input field `%d` has different names: <%s, %s>' % ( + i, field, name) + value = self._get_defaults(field) + # default_value.append(np.array([value])) # for deep_shap + default_value.append(np.array(value)) # for deep_shap + return default_value + + def _get_defaults(self, col_name, col_type='string'): + if col_name in self._input_fields_info: + col_type, default_val = self._input_fields_info[col_name] + default_val = get_type_defaults(col_type, default_val) + logging.info('col_name: %s, default_val: %s' % (col_name, default_val)) + else: + defaults = {'string': '', 'double': 0.0, 'bigint': 0} + assert col_type in defaults, 'invalid col_type: %s, col_type: %s' % ( + col_name, col_type) + default_val = defaults[col_type] + logging.info( + 'col_name: %s, default_val: %s.[not defined in saved_model_dir/assets/pipeline.config]' + % (col_name, default_val)) + return default_val + + def str_to_number(self, values): + assert len(values) == len( + self._input_fields + ), 'value count %d is not equal to the number of input fields %d' % ( + len(values), len(self._input_fields)) + result = [] + for i, name in enumerate(self._input_names): + assert name in self._input_fields_info, 'input `%s` not in pipeline config' % name + idx = self._input_fields.index(name) + input_type, default_val = self._input_fields_info[name] + if input_type in {DatasetConfig.INT32, DatasetConfig.INT64}: + tmp_field = int(values[idx]) + elif input_type in [DatasetConfig.FLOAT, DatasetConfig.DOUBLE]: + tmp_field = float(values[idx]) + elif input_type in [DatasetConfig.BOOL]: + tmp_field = values[idx].lower() in ['true', '1', 't', 'y', 'yes'] + elif input_type in [DatasetConfig.STRING]: + tmp_field = values[idx] + else: + assert False, 'invalid types: %s' % str(input_type) + result.append(tmp_field) + return result + + def get_explainer(self, output_cols=None): + if output_cols is None or output_cols == 'ALL_COLUMNS': + self._output_cols = sorted(self.output_names) + logging.info('predict output cols: %s' % self._output_cols) + else: + # specified as score float,embedding string + tmp_cols = [] + for x in output_cols.split(','): + if x.strip() == '': + continue + tmp_keys = x.split(' ') + tmp_cols.append(tmp_keys[0].strip()) + self._output_cols = tmp_cols + if len(self._output_cols) > 1: + logging.warning( + 'Only one output can be supported currently, use the first one: %s', + self._output_cols[0]) + + output_name = self._output_cols[0] + assert output_name in self.output_names, 'invalid output name `%s` not in model outputs `%s`' % ( + output_name, ','.join(self.output_names)) + if output_name is None: + output = self._outputs_map.values()[0] + elif type(output_name) in {str, unicode}: + output = self._outputs_map[output_name] + else: + raise Exception('unsupported type of output_name: ' + + str(type(output_name))) + + def_vals = self.default_values() + # print('default values (%d):' % len(def_vals), def_vals) + inputs = [self._inputs_map[name] for name in self._input_names] + # e = DeepShap(inputs, output, def_vals, session=self._session) + # self._explainer = e + e = self.deep_explain.get_explainer( + self.method, output, inputs, baseline=def_vals) + return e + + @property + def input_names(self): + """Input names of the model. + + Returns: + a list, which conaining the name of input nodes available in model + """ + return self._input_names + + @property + def output_names(self): + """Output names of the model. + + Returns: + a list, which containing the name of outputs nodes available in model + """ + return list(self._outputs_map.keys()) + + @abc.abstractmethod + def feature_importance(self, + input_path, + output_path, + reserved_cols='', + output_cols=None, + batch_size=1024, + slice_id=0, + slice_num=1): + pass + + # def create_output_table(self, reserved_cols=''): + # reserved_cols = [x.strip() for x in reserved_cols.split(',') if x != ''] + # outputs = self.input_names + # reserved_cols = filter(lambda r: r not in outputs, reserved_cols) + # output_cols = reserved_cols + outputs + # sql = 'create table output_table ' + # return sql + + +class OdpsExplainer(Explainer): + + def feature_importance(self, + input_path, + output_path, + reserved_cols='', + output_cols=None, + batch_size=1024, + slice_id=0, + slice_num=1): + input_cols = self.input_names + input_dim = len(input_cols) + if reserved_cols: + reserved_cols = [ + x.strip() + for x in reserved_cols.split(',') + if x.strip() not in input_cols + ] + input_cols.extend(reserved_cols) + selected_cols = ','.join(input_cols) + print('selected_cols: ' + selected_cols) + + explainer = self.get_explainer(output_cols) + print('reference value:', explainer.expected_value) + + import common_io + reader = common_io.table.TableReader( + input_path, + selected_cols=selected_cols, + slice_id=slice_id, + slice_count=slice_num) + + reserved_cols_idx = [] + if reserved_cols: + reserved_cols = [x.strip() for x in reserved_cols.split(',') if x != ''] + schema = reader.get_schema() + columns = [str(x[0]) for x in schema] + reserved_cols_idx = [columns.index(x) for x in reserved_cols] + print(reserved_cols_idx) + + sum_t0, sum_t1, sum_t2 = 0, 0, 0 + writer = common_io.table.TableWriter(output_path, slice_id=slice_id) + total_records_num = reader.get_row_count() + for i in moves.range(0, total_records_num, batch_size): + t0 = time.time() + records = reader.read(batch_size, allow_smaller_final_batch=True) + t1 = time.time() + records = np.array(records) + inputs = list(records[:, :input_dim].T) + sv = explainer.shap_values(inputs, check_additivity=False) + outputs = [records[:, i] for i in reserved_cols_idx] + if outputs: + outputs.extend(sv[0]) + else: + outputs = sv[0] + indices = range(len(outputs)) + t2 = time.time() + writer.write(np.array(outputs).T, indices, allow_type_cast=True) + t3 = time.time() + sum_t0 += (t1 - t0) + sum_t1 += (t2 - t1) + sum_t2 += (t3 - t2) + if i % 100 == 0: + logging.info('progress: batch_num=%d sample_num=%d' % + (i + 1, (i + 1) * batch_size)) + logging.info('time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + logging.info('Final_time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + writer.close() + reader.close() + logging.info('Explain %s done.' % input_path) + + +class OdpsRtpExplainer(Explainer): + + def __init__(self, deep_explain, model_path, method_name): + super(OdpsRtpExplainer, self).__init__(deep_explain, model_path, + method_name) + pipeline_path = os.path.join(model_path, 'assets/pipeline.config') + if not gfile.Exists(pipeline_path): + logging.warning( + '%s not exists, default values maybe inconsistent with the values used in training.' + % pipeline_path) + return + pipeline_config = get_configs_from_pipeline_file(pipeline_path) + self._fg_separator = pipeline_config.data_config.separator + + if pipeline_config.export_config.filter_inputs: + if len(pipeline_config.feature_configs) > 0: + feature_configs = pipeline_config.feature_configs + elif pipeline_config.feature_config and len( + pipeline_config.feature_config.features) > 0: + feature_configs = pipeline_config.feature_config.features + else: + assert False, 'One of feature_configs and feature_config.features must be configured.' + + self._effective_fields = [] + for fc in feature_configs: + for input_name in fc.input_names: + assert input_name in self._input_fields, 'invalid input_name in %s' % str( + fc) + if input_name not in self._effective_fields: + self._effective_fields.append(input_name) + self._effective_fids = [ + self._input_fields.index(x) for x in self._effective_fields + ] + # sort fids from small to large + self._effective_fids = list(set(self._effective_fids)) + self._effective_fields = [ + self._input_fields[x] for x in self._effective_fids + ] + logging.info('raw input fields: %d, effective fields: %d' % + (len(self._input_fields), len(self._effective_fields))) + + def feature_importance(self, + input_path, + output_path, + reserved_cols='', + output_cols=None, + batch_size=1024, + slice_id=0, + slice_num=1): + input_cols = [x.strip() for x in reserved_cols.split(',') if x != ''] + reserved_dim = len(input_cols) + if 'features' not in input_cols: + input_cols.append('features') + selected_cols = ','.join(input_cols) + print('selected_cols: ' + selected_cols) + + explainer = self.get_explainer(output_cols) + print('reference value:', explainer.expected_value) + + import common_io + reader = common_io.table.TableReader( + input_path, + selected_cols=selected_cols, + slice_id=slice_id, + slice_count=slice_num) + + sum_t0, sum_t1, sum_t2 = 0, 0, 0 + writer = common_io.table.TableWriter(output_path, slice_id=slice_id) + total_records_num = reader.get_row_count() + for i in moves.range(0, total_records_num, batch_size): + t0 = time.time() + records = reader.read(batch_size, allow_smaller_final_batch=True) + t1 = time.time() + inputs = [] + reserved = [] + for j in range(len(records)): + if reserved_dim > 0: + reserved.append(records[j][:reserved_dim]) + inputs.append( + self.str_to_number(records[j][-1].decode('utf-8').split( + self._fg_separator))) + inputs = list(np.array(inputs).T) + print('inputs:', inputs) + # sv = explainer.shap_values(inputs, check_additivity=False) + ret = explainer.run(inputs, batch_size=len(records)) + ret = np.array(ret) + if reserved_dim > 0: + outputs = np.concatenate([np.array(reserved), ret], axis=1) + else: + outputs = ret + indices = range(outputs.shape[1]) + t2 = time.time() + writer.write(outputs.T, indices, allow_type_cast=True) + t3 = time.time() + sum_t0 += (t1 - t0) + sum_t1 += (t2 - t1) + sum_t2 += (t3 - t2) + if i % 2 == 0: + logging.info('progress: batch_num=%d sample_num=%d' % + (i + 1, (i + 1) * batch_size)) + logging.info('time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + logging.info('Final_time_stats: read: %.2f predict: %.2f write: %.2f' % + (sum_t0, sum_t1, sum_t2)) + writer.close() + reader.close() + logging.info('Explain %s done.' % input_path) + + +def _get_input_fields_from_pipeline_config(model_path): + pipeline_path = os.path.join(model_path, 'assets/pipeline.config') + if not gfile.Exists(pipeline_path): + logging.warning( + '%s not exists, default values maybe inconsistent with the values used in training.' + % pipeline_path) + return {}, [] + pipeline_config = get_configs_from_pipeline_file(pipeline_path) + data_config = pipeline_config.data_config + label_fields = data_config.label_fields + labels = {x for x in label_fields} + if data_config.HasField('sample_weight'): + labels.add(data_config.sample_weight) + + input_fields = data_config.input_fields + input_fields_info = { + input_field.input_name: (input_field.input_type, input_field.default_val) + for input_field in input_fields + if input_field.input_name not in labels + } + input_fields_list = [ + input_field.input_name + for input_field in input_fields + if input_field.input_name not in labels + ] + return input_fields_info, input_fields_list + + +def search_pb(directory, use_latest=False): + """Search pb file recursively in model directory. if multiple pb files exist, exception will be raised. + + If multiple pb files exist, exception will be raised. + + Args: + directory: model directory. + + Returns: + directory contain pb file + """ + dir_list = [] + for root, dirs, files in gfile.Walk(directory): + for f in files: + if f.endswith('saved_model.pb'): + dir_list.append(root) + if len(dir_list) == 0: + raise ValueError('savedmodel is not found in directory %s' % directory) + elif len(dir_list) > 1: + if use_latest: + logging.info('find %d models: %s' % (len(dir_list), ','.join(dir_list))) + dir_list = sorted( + dir_list, + key=lambda x: int(x.split('/')[(-2 if (x[-1] == '/') else -1)])) + return dir_list[-1] + else: + raise ValueError('multiple saved model found in directory %s' % directory) + + return dir_list[0] + + +# def create_explainer(model_path, use_latest=False): +# if gfile.IsDirectory(model_path): +# model_path = search_pb(model_path, use_latest) +# else: +# raise ValueError('model_path should be a directory, path:' + model_path) +# pipeline_path = os.path.join(model_path, 'assets/pipeline.config') +# if not gfile.Exists(pipeline_path): +# logging.warning('%s not exists' % pipeline_path) +# raise ValueError('%s not exists' % pipeline_path) +# +# pipeline_config = get_configs_from_pipeline_file(pipeline_path) +# input_type = pipeline_config.data_config.input_type +# if input_type in {DatasetConfig.OdpsInput, DatasetConfig.OdpsInputV2, DatasetConfig.OdpsInputV3}: +# return OdpsExplainer(model_path) +# if input_type in {DatasetConfig.OdpsRTPInput, DatasetConfig.OdpsRTPInputV2}: +# return OdpsRtpExplainer(model_path) +# raise ValueError("currently unsupported input type: " + input_type) + + +def run(FLAGS): + model_path = FLAGS.saved_model_dir + if gfile.IsDirectory(model_path): + model_path = search_pb(model_path, False) + else: + raise ValueError('model_path should be a directory, path:' + model_path) + pipeline_path = os.path.join(model_path, 'assets/pipeline.config') + if not gfile.Exists(pipeline_path): + logging.warning('%s not exists' % pipeline_path) + raise ValueError('%s not exists' % pipeline_path) + + gpu_options = tf.GPUOptions(allow_growth=True) + session_config = tf.ConfigProto( + gpu_options=gpu_options, allow_soft_placement=True) + session = tf.Session(config=session_config) + + worker_count = len(FLAGS.worker_hosts.split(',')) + with DeepExplain(session=session) as de: + e = OdpsRtpExplainer(de, model_path, 'deeplift') + e.feature_importance( + FLAGS.explain_tables if FLAGS.explain_tables else FLAGS.tables, + FLAGS.outputs, + reserved_cols=FLAGS.reserved_cols, + output_cols=FLAGS.output_cols, + batch_size=FLAGS.batch_size, + slice_id=FLAGS.task_index, + slice_num=worker_count) diff --git a/easy_rec/python/tools/explainer/feature_importance.py b/easy_rec/python/tools/explainer/feature_importance.py new file mode 100644 index 000000000..7085274ab --- /dev/null +++ b/easy_rec/python/tools/explainer/feature_importance.py @@ -0,0 +1,55 @@ +from __future__ import print_function + +import tensorflow as tf + +from easy_rec.python.tools.explainer.explainer import run + +flags = tf.app.flags + +flags.DEFINE_string('saved_model_dir', '', + 'directory where saved_model.pb exists') +flags.DEFINE_string('explain_tables', '', 'tables used for explaination') +flags.DEFINE_string('background_table', '', 'tables used for expected value') +flags.DEFINE_string('tables', '', 'tables passed by pai command') +flags.DEFINE_string('outputs', '', 'output tables') +flags.DEFINE_string( + 'selected_cols', '', + 'columns to keep from input table, they are separated with ,') +flags.DEFINE_string( + 'reserved_cols', '', + 'columns to keep from input table, they are separated with ,') +flags.DEFINE_string( + 'output_cols', None, + 'output columns, such as: score float. multiple columns are separated by ,') +flags.DEFINE_integer('batch_size', 1024, 'predict batch size') +flags.DEFINE_string('worker_hosts', '', + 'Comma-separated list of hostname:port pairs') +flags.DEFINE_integer('task_index', 0, 'Index of task within the job') + +FLAGS = flags.FLAGS + + +def main(_): + for k in FLAGS: + if k in ('h', 'help', 'helpshort', 'helpfull'): + continue + print('%s=%s' % (k, FLAGS[k].value)) + + # worker_count = len(FLAGS.worker_hosts.split(',')) + # e = create_explainer(FLAGS.saved_model_dir) + # + # output_names = e.input_names + # print("feature_names:", output_names) + # print("feature_num:", len(output_names)) + # e.feature_importance(FLAGS.explain_tables if FLAGS.explain_tables else FLAGS.tables, + # FLAGS.outputs, + # reserved_cols=FLAGS.reserved_cols, + # output_cols=FLAGS.output_cols, + # batch_size=FLAGS.batch_size, + # slice_id=FLAGS.task_index, + # slice_num=worker_count) + run(FLAGS) + + +if __name__ == '__main__': + tf.app.run(main=main) diff --git a/easy_rec/python/tools/explainer/methods.py b/easy_rec/python/tools/explainer/methods.py new file mode 100644 index 000000000..38c53be55 --- /dev/null +++ b/easy_rec/python/tools/explainer/methods.py @@ -0,0 +1,721 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import sys +import warnings +from collections import OrderedDict + +import numpy as np +import tensorflow as tf +from skimage.util import view_as_windows +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_grad +from tensorflow.python.ops import nn_grad + +from easy_rec.python.tools.explainer.utils import make_batches +from easy_rec.python.tools.explainer.utils import slice_arrays +from easy_rec.python.tools.explainer.utils import to_list +from easy_rec.python.tools.explainer.utils import unpack_singleton + +SUPPORTED_ACTIVATIONS = ['Relu', 'Elu', 'Sigmoid', 'Tanh', 'Softplus'] + +UNSUPPORTED_ACTIVATIONS = ['CRelu', 'Relu6', 'Softsign'] + +_ENABLED_METHOD_CLASS = None +_GRAD_OVERRIDE_CHECKFLAG = 0 + +# ----------------------------------------------------------------------------- +# UTILITY FUNCTIONS +# ----------------------------------------------------------------------------- + + +def activation(type): + """Returns Tensorflow's activation op, given its type. + + :param type: string + :return: op + """ + if type not in SUPPORTED_ACTIVATIONS: + warnings.warn('Activation function (%s) not supported' % type) + f = getattr(tf.nn, type.lower()) + return f + + +def original_grad(op, grad): + """Return original Tensorflow gradient for an op. + + :param op: op + :param grad: Tensor + :return: Tensor + """ + if op.type not in SUPPORTED_ACTIVATIONS: + warnings.warn('Activation function (%s) not supported' % op.type) + opname = '_%sGrad' % op.type + if hasattr(nn_grad, opname): + f = getattr(nn_grad, opname) + else: + f = getattr(math_grad, opname) + return f(op, grad) + + +# ----------------------------------------------------------------------------- +# ATTRIBUTION METHODS BASE CLASSES +# ----------------------------------------------------------------------------- + + +class AttributionMethod(object): + """Attribution method base class.""" + + def __init__(self, T, X, session, keras_learning_phase=None): + self.T = T # target Tensor + self.X = X # input Tensor + self.Y_shape = [ + None, + ] + T.get_shape().as_list()[1:] + # Most often T contains multiple output units. In this case, it is often necessary to select + # a single unit to compute contributions for. This can be achieved passing 'ys' as weight for the output Tensor. + self.Y = tf.placeholder(tf.float32, self.Y_shape) + # placeholder_from_data(ys) if ys is not None else 1.0 # Tensor that represents weights for T + self.T = self.T * self.Y + self.symbolic_attribution = None + self.session = session + self.keras_learning_phase = keras_learning_phase + self.has_multiple_inputs = type(self.X) is list or type(self.X) is tuple + logging.info('Model with multiple inputs: %s' % self.has_multiple_inputs) + + # Set baseline + # TODO: now this sets a baseline also for those methods that does not require it + self._set_check_baseline() + + # References + self._init_references() + + # Create symbolic explanation once during construction (affects only gradient-based methods) + self.explain_symbolic() + + def explain_symbolic(self): + return None + + def run(self, xs, ys=None, batch_size=None): + pass + + def _init_references(self): + pass + + def _check_input_compatibility(self, xs, ys=None, batch_size=None): + if ys is not None: + if not self.has_multiple_inputs and len(xs) != len(ys): + raise RuntimeError( + 'When provided, ys must have the same batch size as xs (xs has batch size {} and ys {})' + .format(len(xs), len(ys))) + elif self.has_multiple_inputs and np.all([len(i) != len(ys) for i in xs]): + raise RuntimeError( + 'When provided, ys must have the same batch size as all elements of xs' + ) + if batch_size is not None and batch_size > 0: + if self.T.shape[0].value is not None and self.T.shape[ + 0].value is not batch_size: + raise RuntimeError( + 'When using batch evaluation, the first dimension of the target tensor ' + 'must be compatible with the batch size. Found %s instead' % + self.T.shape[0].value) + if isinstance(self.X, list): + for x in self.X: + if x.shape[0].value is not None and x.shape[0].value is not batch_size: + raise RuntimeError( + 'When using batch evaluation, the first dimension of the input tensor ' + 'must be compatible with the batch size. Found %s instead' % + x.shape[0].value) + else: + if self.X.shape[0].value is not None and self.X.shape[ + 0].value is not batch_size: + raise RuntimeError( + 'When using batch evaluation, the first dimension of the input tensor ' + 'must be compatible with the batch size. Found %s instead' % + self.X.shape[0].value) + + def _session_run_batch(self, T, xs, ys=None): + feed_dict = {} + if self.has_multiple_inputs: + for k, v in zip(self.X, xs): + feed_dict[k] = v + else: + feed_dict[self.X] = xs + + # If ys is not passed, produce a vector of ones that will be broadcasted to all batch samples + feed_dict[self.Y] = ys if ys is not None else np.ones([ + 1, + ] + self.Y_shape[1:]) + + if self.keras_learning_phase is not None: + feed_dict[self.keras_learning_phase] = 0 + return self.session.run(T, feed_dict) + + def _session_run(self, T, xs, ys=None, batch_size=None): + num_samples = len(xs) + if self.has_multiple_inputs is True: + num_samples = len(xs[0]) + if len(xs) != len(self.X): + raise RuntimeError( + 'List of input tensors and input data have different lengths (%s and %s)' + % (str(len(xs)), str(len(self.X)))) + if batch_size is not None: + for xi in xs: + if len(xi) != num_samples: + raise RuntimeError( + 'Evaluation in batches requires all inputs to have ' + 'the same number of samples') + + if batch_size is None or batch_size <= 0 or num_samples <= batch_size: + return self._session_run_batch(T, xs, ys) + else: + outs = [] + batches = make_batches(num_samples, batch_size) + for batch_index, (batch_start, batch_end) in enumerate(batches): + # Get a batch from data + xs_batch = slice_arrays(xs, batch_start, batch_end) + # If the target tensor has one entry for each sample, we need to batch it as well + ys_batch = None + if ys is not None: + ys_batch = slice_arrays(ys, batch_start, batch_end) + batch_outs = self._session_run_batch(T, xs_batch, ys_batch) + batch_outs = to_list(batch_outs) + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + shape = (num_samples,) + batch_out.shape[1:] + outs.append(np.zeros(shape, dtype=batch_out.dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + return unpack_singleton(outs) + + def _set_check_baseline(self): + # Do nothing for those methods that have no baseline required + if not hasattr(self, 'baseline'): + return + + if self.baseline is None: + if self.has_multiple_inputs: + self.baseline = [ + np.zeros([ + 1, + ] + xi.get_shape().as_list()[1:]) for xi in self.X + ] + else: + self.baseline = np.zeros([ + 1, + ] + self.X.get_shape().as_list()[1:]) + + else: + if self.has_multiple_inputs: + for i, xi in enumerate(self.X): + if list(self.baseline[i].shape) == xi.get_shape().as_list()[1:]: + self.baseline[i] = np.expand_dims(self.baseline[i], 0) + else: + raise RuntimeError( + 'Baseline shape %s does not match expected shape %s' % + (self.baseline[i].shape, xi.get_shape().as_list()[1:])) + else: + if list(self.baseline.shape) == self.X.get_shape().as_list()[1:]: + self.baseline = np.expand_dims(self.baseline, 0) + else: + raise RuntimeError( + 'Baseline shape %s does not match expected shape %s' % + (self.baseline.shape, self.X.get_shape().as_list()[1:])) + + +class GradientBasedMethod(AttributionMethod): + """Base class for gradient-based attribution methods.""" + + def get_symbolic_attribution(self): + return tf.gradients(self.T, self.X) + + def explain_symbolic(self): + if self.symbolic_attribution is None: + self.symbolic_attribution = self.get_symbolic_attribution() + return self.symbolic_attribution + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + results = self._session_run(self.explain_symbolic(), xs, ys, batch_size) + return results[0] if not self.has_multiple_inputs else results + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + return original_grad(op, grad) + + +class PerturbationBasedMethod(AttributionMethod): + """Base class for perturbation-based attribution methods.""" + + def __init__(self, T, X, session, keras_learning_phase): + super(PerturbationBasedMethod, self).__init__(T, X, session, + keras_learning_phase) + self.base_activation = None + + +# ----------------------------------------------------------------------------- +# ATTRIBUTION METHODS +# ----------------------------------------------------------------------------- +""" +Returns zero attributions. For testing only. +""" + + +class DummyZero(GradientBasedMethod): + + def get_symbolic_attribution(self,): + return tf.gradients(self.T, self.X) + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + input = op.inputs[0] + return tf.zeros_like(input) + + +""" +Saliency maps +https://arxiv.org/abs/1312.6034 +""" + + +class Saliency(GradientBasedMethod): + + def get_symbolic_attribution(self): + return [tf.abs(g) for g in tf.gradients(self.T, self.X)] + + +""" +Gradient * Input +https://arxiv.org/pdf/1704.02685.pdf - https://arxiv.org/abs/1611.07270 +""" + + +class GradientXInput(GradientBasedMethod): + + def get_symbolic_attribution(self): + return [ + g * x for g, x in zip( + tf.gradients(self.T, self.X), + self.X if self.has_multiple_inputs else [self.X]) + ] + + +""" +Integrated Gradients +https://arxiv.org/pdf/1703.01365.pdf +""" + + +class IntegratedGradients(GradientBasedMethod): + + def __init__(self, + T, + X, + session, + keras_learning_phase, + steps=100, + baseline=None): + self.steps = steps + self.baseline = baseline + super(IntegratedGradients, self).__init__(T, X, session, + keras_learning_phase) + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + + gradient = None + for alpha in list(np.linspace(1. / self.steps, 1.0, self.steps)): + xs_mod = [b + (x - b) * alpha for x, b in zip(xs, self.baseline)] if self.has_multiple_inputs \ + else self.baseline + (xs - self.baseline) * alpha + _attr = self._session_run(self.explain_symbolic(), xs_mod, ys, batch_size) + if gradient is None: + gradient = _attr + else: + gradient = [g + a for g, a in zip(gradient, _attr)] + + results = [ + g * (x - b) / self.steps for g, x, b in zip( + gradient, xs if self.has_multiple_inputs else [xs], + self.baseline if self.has_multiple_inputs else [self.baseline]) + ] + + return results[0] if not self.has_multiple_inputs else results + + +""" +Layer-wise Relevance Propagation with epsilon rule +http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0130140 +""" + + +class EpsilonLRP(GradientBasedMethod): + eps = None + + def __init__(self, T, X, session, keras_learning_phase, epsilon=1e-4): + assert epsilon > 0.0, 'LRP epsilon must be greater than zero' + global eps + eps = epsilon + super(EpsilonLRP, self).__init__(T, X, session, keras_learning_phase) + + def get_symbolic_attribution(self): + return [ + g * x for g, x in zip( + tf.gradients(self.T, self.X), + self.X if self.has_multiple_inputs else [self.X]) + ] + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + output = op.outputs[0] + input = op.inputs[0] + return grad * output / ( + input + eps * + tf.where(input >= 0, tf.ones_like(input), -1 * tf.ones_like(input))) + + +""" +DeepLIFT +This reformulation only considers the "Rescale" rule +https://arxiv.org/abs/1704.02685 +""" + + +class DeepLIFTRescale(GradientBasedMethod): + + _deeplift_ref = {} + + def __init__(self, T, X, session, keras_learning_phase, baseline=None): + self.baseline = baseline + super(DeepLIFTRescale, self).__init__(T, X, session, keras_learning_phase) + + def get_symbolic_attribution(self): + return [ + g * (x - b) for g, x, b in zip( + tf.gradients(self.T, self.X), + self.X if self.has_multiple_inputs else [self.X], + self.baseline if self.has_multiple_inputs else [self.baseline]) + ] + + @classmethod + def nonlinearity_grad_override(cls, op, grad): + output = op.outputs[0] + input = op.inputs[0] + ref_input = cls._deeplift_ref[op.name] + ref_output = activation(op.type)(ref_input) + delta_out = output - ref_output + delta_in = input - ref_input + instant_grad = activation(op.type)(0.5 * (ref_input + input)) + return tf.where( + tf.abs(delta_in) > 1e-5, grad * delta_out / delta_in, + original_grad(instant_grad.op, grad)) + + def _init_references(self): + # print ('DeepLIFT: computing references...') + sys.stdout.flush() + self._deeplift_ref.clear() + ops = [] + g = tf.get_default_graph() + for op in g.get_operations(): + if len(op.inputs) > 0 and not op.name.startswith('gradients'): + if op.type in SUPPORTED_ACTIVATIONS: + ops.append(op) + YR = self._session_run([o.inputs[0] for o in ops], self.baseline) + for (r, op) in zip(YR, ops): + self._deeplift_ref[op.name] = r + # print('DeepLIFT: references ready') + sys.stdout.flush() + + +""" +Occlusion method +Generalization of the grey-box method presented in https://arxiv.org/pdf/1311.2901.pdf +This method performs a systematic perturbation of contiguous hyperpatches in the input, +replacing each patch with a user-defined value (by default 0). +window_shape : integer or tuple of length xs_ndim +Defines the shape of the elementary n-dimensional orthotope the rolling window view. +If an integer is given, the shape will be a hypercube of sidelength given by its value. +step : integer or tuple of length xs_ndim +Indicates step size at which extraction shall be performed. +If integer is given, then the step is uniform in all dimensions. +""" + + +class Occlusion(PerturbationBasedMethod): + + def __init__(self, + T, + X, + session, + keras_learning_phase, + window_shape=None, + step=None): + super(Occlusion, self).__init__(T, X, session, keras_learning_phase) + if self.has_multiple_inputs: + raise RuntimeError( + 'Multiple inputs not yet supported for perturbation methods') + + input_shape = X[0].get_shape().as_list() + if window_shape is not None: + assert len(window_shape) == len(input_shape), \ + 'window_shape must have length of input (%d)' % len(input_shape) + self.window_shape = tuple(window_shape) + else: + self.window_shape = (1,) * len(input_shape) + + if step is not None: + assert isinstance(step, int) or len(step) == len(input_shape), \ + 'step must be integer or tuple with the length of input (%d)' % len(input_shape) + self.step = step + else: + self.step = 1 + self.replace_value = 0.0 + logging.info('Input shape: %s; window_shape %s; step %s' % + (input_shape, self.window_shape, self.step)) + + def run(self, xs, ys=None, batch_size=None): + self._check_input_compatibility(xs, ys, batch_size) + input_shape = xs.shape[1:] + batch_size = xs.shape[0] + total_dim = np.asscalar(np.prod(input_shape)) + + # Create mask + index_matrix = np.arange(total_dim).reshape(input_shape) + idx_patches = view_as_windows(index_matrix, self.window_shape, + self.step).reshape((-1,) + self.window_shape) + heatmap = np.zeros_like(xs, dtype=np.float32).reshape((-1), total_dim) + w = np.zeros_like(heatmap) + + # Compute original output + eval0 = self._session_run(self.T, xs, ys, batch_size) + + # Start perturbation loop + for i, p in enumerate(idx_patches): + mask = np.ones(input_shape).flatten() + mask[p.flatten()] = self.replace_value + masked_xs = mask.reshape((1,) + input_shape) * xs + delta = eval0 - self._session_run(self.T, masked_xs, ys, batch_size) + delta_aggregated = np.sum( + delta.reshape((batch_size, -1)), -1, keepdims=True) + heatmap[:, p.flatten()] += delta_aggregated + w[:, p.flatten()] += p.size + + attribution = np.reshape(heatmap / w, xs.shape) + if np.isnan(attribution).any(): + warnings.warn( + 'Attributions generated by Occlusion method contain nans, ' + 'probably because window_shape and step do not allow to cover the all input.' + ) + return attribution + + +""" +Shapley Value sampling +Computes approximate Shapley Values using "Polynomial calculation of the Shapley value based on sampling", +Castro et al, 2009 (https://www.sciencedirect.com/science/article/pii/S0305054808000804) +samples : integer (default 5) +Defined the number of samples for each input feature. +Notice that evaluating a model samples * n_input_feature times might take a while. +sampling_dims : list of dimension indexes to run sampling on (feature dimensions). +By default, all dimensions except the batch dimension will be sampled. +For example, with a 4-D tensor that contains color images, single color channels are sampled. +To sample pixels, instead, use sampling_dims=[1,2] +""" + + +class ShapleySampling(PerturbationBasedMethod): + + def __init__(self, + T, + X, + session, + keras_learning_phase, + samples=5, + sampling_dims=None): + super(ShapleySampling, self).__init__(T, X, session, keras_learning_phase) + if self.has_multiple_inputs: + raise RuntimeError( + 'Multiple inputs not yet supported for perturbation methods') + dims = len(X.shape) + if sampling_dims is not None: + if not 0 < len(sampling_dims) <= (dims - 1): + raise RuntimeError( + 'sampling_dims must be a list containing 1 to %d elements' % + (dims - 1)) + if 0 in sampling_dims: + raise RuntimeError( + 'Cannot sample batch dimension: remove 0 from sampling_dims') + if any([x < 1 or x > dims - 1 for x in sampling_dims]): + raise RuntimeError('Invalid value in sampling_dims') + else: + sampling_dims = list(range(1, dims)) + + self.samples = samples + self.sampling_dims = sampling_dims + + def run(self, xs, ys=None, batch_size=None): + xs_shape = list(xs.shape) + batch_size = xs.shape[0] + n_features = int( + np.asscalar(np.prod([xs.shape[i] for i in self.sampling_dims]))) + result = np.zeros((xs_shape[0], n_features)) + + run_shape = list(xs_shape) # a copy + run_shape = np.delete(run_shape, self.sampling_dims).tolist() + run_shape.insert(1, -1) + + reconstruction_shape = [xs_shape[0]] + for j in self.sampling_dims: + reconstruction_shape.append(xs_shape[j]) + + for r in range(self.samples): + p = np.random.permutation(n_features) + x = xs.copy().reshape(run_shape) + y = None + for i in p: + if y is None: + y = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) + x[:, i] = 0 + y0 = self._session_run(self.T, x.reshape(xs_shape), ys, batch_size) + delta = y - y0 + delta_aggregated = np.sum( + delta.reshape((batch_size, -1)), -1, keepdims=False) + result[:, i] += delta_aggregated + y = y0 + + shapley = result / self.samples + return shapley.reshape(reconstruction_shape) + + +# ----------------------------------------------------------------------------- +# END ATTRIBUTION METHODS +# ----------------------------------------------------------------------------- + +attribution_methods = OrderedDict({ + 'zero': (DummyZero, 0), + 'saliency': (Saliency, 1), + 'grad*input': (GradientXInput, 2), + 'intgrad': (IntegratedGradients, 3), + 'elrp': (EpsilonLRP, 4), + 'deeplift': (DeepLIFTRescale, 5), + 'occlusion': (Occlusion, 6), + 'shapley_sampling': (ShapleySampling, 7) +}) + + +@ops.RegisterGradient('DeepExplainGrad') +def deepexplain_grad(op, grad): + global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG + _GRAD_OVERRIDE_CHECKFLAG = 1 + if _ENABLED_METHOD_CLASS is not None \ + and issubclass(_ENABLED_METHOD_CLASS, GradientBasedMethod): + return _ENABLED_METHOD_CLASS.nonlinearity_grad_override(op, grad) + else: + return original_grad(op, grad) + + +class DeepExplain(object): + + def __init__(self, graph=None, session=tf.get_default_session()): + self.method = None + self.batch_size = None + self.session = session + self.graph = session.graph if graph is None else graph + self.graph_context = self.graph.as_default() + self.override_context = self.graph.gradient_override_map( + self.get_override_map()) + self.keras_phase_placeholder = None + self.context_on = False + if self.session is None: + raise RuntimeError( + 'DeepExplain: could not retrieve a session. Use DeepExplain(session=your_session).' + ) + + def __enter__(self): + # Override gradient of all ops created in context + self.graph_context.__enter__() + self.override_context.__enter__() + self.context_on = True + return self + + def __exit__(self, type, value, traceback): + self.graph_context.__exit__(type, value, traceback) + self.override_context.__exit__(type, value, traceback) + self.context_on = False + + def get_explainer(self, method, T, X, **kwargs): + if not self.context_on: + raise RuntimeError( + 'Explain can be called only within a DeepExplain context.') + global _ENABLED_METHOD_CLASS, _GRAD_OVERRIDE_CHECKFLAG + self.method = method + if self.method in attribution_methods: + method_class, method_flag = attribution_methods[self.method] + else: + raise RuntimeError('Method must be in %s' % + list(attribution_methods.keys())) + if isinstance(X, list): + for x in X: + if 'tensor' not in str(type(x)).lower(): + raise RuntimeError( + 'If a list, X must contain only Tensorflow Tensor objects') + else: + if 'tensor' not in str(type(X)).lower(): + raise RuntimeError( + 'X must be a Tensorflow Tensor object or a list of them') + + if 'tensor' not in str(type(T)).lower(): + raise RuntimeError('T must be a Tensorflow Tensor object') + + logging.info('DeepExplain: running "%s" explanation method (%d)' % + (self.method, method_flag)) + self._check_ops() + _GRAD_OVERRIDE_CHECKFLAG = 0 + + _ENABLED_METHOD_CLASS = method_class + method = _ENABLED_METHOD_CLASS( + T, + X, + self.session, + keras_learning_phase=self.keras_phase_placeholder, + **kwargs) + + if issubclass(_ENABLED_METHOD_CLASS, + GradientBasedMethod) and _GRAD_OVERRIDE_CHECKFLAG == 0: + warnings.warn( + 'DeepExplain detected you are trying to use an attribution method that requires ' + 'gradient override but the original gradient was used instead. You might have forgot to ' + '(re)create your graph within the DeepExlain context. Results are not reliable!' + ) + _ENABLED_METHOD_CLASS = None + _GRAD_OVERRIDE_CHECKFLAG = 0 + self.keras_phase_placeholder = None + return method + + def explain(self, method, T, X, xs, ys=None, batch_size=None, **kwargs): + explainer = self.get_explainer(method, T, X, **kwargs) + return explainer.run(xs, ys, batch_size) + + @staticmethod + def get_override_map(): + return dict((a, 'DeepExplainGrad') for a in SUPPORTED_ACTIVATIONS) + + def _check_ops(self): + """Heuristically check if any op is in the list of unsupported activation functions. + + This does not cover all cases where explanation methods would fail, and must be improved in the future. + Also, check if the placeholder named 'keras_learning_phase' exists in the graph. This is used by Keras + and needs to be passed in feed_dict. + :return: + """ + g = tf.get_default_graph() + for op in g.get_operations(): + if len(op.inputs) > 0 and not op.name.startswith('gradients'): + if op.type in UNSUPPORTED_ACTIVATIONS: + warnings.warn('Detected unsupported activation (%s). ' + 'This might lead to unexpected or wrong results.' % + op.type) + elif 'keras_learning_phase' in op.name: + self.keras_phase_placeholder = op.outputs[0] diff --git a/easy_rec/python/tools/explainer/utils.py b/easy_rec/python/tools/explainer/utils.py new file mode 100644 index 000000000..574d067a8 --- /dev/null +++ b/easy_rec/python/tools/explainer/utils.py @@ -0,0 +1,70 @@ +import numpy as np +import tensorflow as tf + +# Some of the following functions for batch processing have been borrowed and adapter from Keras +# https://github.com/keras-team/keras/blob/master/keras/utils/generic_utils.py +# https://github.com/keras-team/keras/blob/master/keras/engine/training_utils.py + + +def make_batches(size, batch_size): + """Returns a list of batch indices (tuples of indices). + + # Arguments + size: Integer, total size of the data to slice into batches. + batch_size: Integer, batch size. + # Returns + A list of tuples of array indices. + """ + num_batches = (size + batch_size - 1) // batch_size # round up + return [(i * batch_size, min(size, (i + 1) * batch_size)) + for i in range(num_batches)] + + +def to_list(x, allow_tuple=False): + """Normalizes a list/tensor into a list. If a tensor is passed, we return a list of size 1 containing the tensor. + + # Arguments + x: target object to be normalized. + allow_tuple: If False and x is a tuple, + it will be converted into a list + with a single element (the tuple). + Else converts the tuple to a list. + # Returns + A list. + """ + if isinstance(x, list): + return x + if allow_tuple and isinstance(x, tuple): + return list(x) + return [x] + + +def unpack_singleton(x): + """Gets the equivalent np-array if the iterable has only one value. Otherwise return the iterable. + + # Argument + x: A list or tuple. + # Returns + The same iterable or the iterable converted to a np-array. + """ + if len(x) == 1: + return np.array(x) + return x + + +def slice_arrays(arrays, start=None, stop=None): + """Slices an array or list of arrays.""" + if arrays is None: + return [None] + elif isinstance(arrays, list): + return [None if x is None else x[start:stop] for x in arrays] + else: + return arrays[start:stop] + + +def placeholder_from_data(numpy_array): + if numpy_array is None: + return None + return tf.placeholder('float', [ + None, + ] + list(numpy_array.shape[1:])) diff --git a/easy_rec/python/tools/feature_selection.py b/easy_rec/python/tools/feature_selection.py index 05b193897..bd31fef9b 100644 --- a/easy_rec/python/tools/feature_selection.py +++ b/easy_rec/python/tools/feature_selection.py @@ -10,6 +10,7 @@ import tensorflow as tf from tensorflow.python.framework.meta_graph import read_meta_graph_file +from easy_rec.python.protos.feature_config_pb2 import FeatureConfig from easy_rec.python.utils import config_util if tf.__version__ >= '2.0': @@ -19,8 +20,9 @@ matplotlib.use('Agg') # NOQA import matplotlib.pyplot as plt # NOQA -tf.app.flags.DEFINE_string('model_type', 'variational_dropout', - 'feature selection model type') +tf.app.flags.DEFINE_enum('model_type', 'variational_dropout', + ['variational_dropout', 'fscd'], + 'feature selection model type') tf.app.flags.DEFINE_string('config_path', '', 'feature selection model config path') tf.app.flags.DEFINE_string('checkpoint_path', None, @@ -294,6 +296,159 @@ def _visualize_feature_importance(self, feature_importance, group_name): plt.savefig(f, format='png') +class FSCD(object): + + def __init__(self, + config_path, + output_dir, + topk, + checkpoint_path=None, + fg_path=None, + visualize=False): + self._config_path = config_path + self._output_dir = output_dir + self._topk = topk + if not tf.gfile.Exists(self._output_dir): + tf.gfile.MakeDirs(self._output_dir) + self._checkpoint_path = checkpoint_path + self._fg_path = fg_path + self._visualize = visualize + + def process(self): + tf.logging.info('Loading delta of FSCD layer ...') + config = config_util.get_configs_from_pipeline_file(self._config_path) + assert config.model_config.HasField( + 'variational_dropout'), 'variational_dropout must be in model_config' + + feature_importance_map = {} + white_feature_group = set() + from easy_rec.python.layers.fscd_layer import get_feature_importance + for feature_group in config.model_config.feature_groups: + group_name = feature_group.group_name + tf.logging.info('Calculating %s feature importance ...' % group_name) + feature_importance = get_feature_importance(config, group_name) + if len(feature_importance) == 0: + tf.logging.info('No feature importance in group %s' % group_name) + white_feature_group.add(group_name) + continue + feature_importance_map[group_name] = feature_importance + + tf.logging.info('Dump %s feature importance to csv ...' % group_name) + self._dump_to_csv(feature_importance, group_name) + + if self._visualize: + tf.logging.info('Visualizing %s feature importance ...' % group_name) + self._visualize_feature_importance(feature_importance, group_name) + + tf.logging.info('Processing model config ...') + self._process_config(feature_importance_map, white_feature_group) + + def _dump_to_csv(self, feature_importance, group_name): + """Dump feature importance data to a csv file.""" + with tf.gfile.Open( + os.path.join(self._output_dir, + 'feature_importance_%s.csv' % group_name), 'w') as f: + df = pd.DataFrame( + columns=['feature_name', 'importance'], + data=[list(kv) for kv in feature_importance.items()]) + df.to_csv(f, encoding='gbk') + + def _visualize_feature_importance(self, feature_importance, group_name): + """Draw feature importance histogram.""" + df = pd.DataFrame( + columns=['feature_name', 'importance'], + data=[list(kv) for kv in feature_importance.items()]) + df['color'] = ['red' if x < 0.5 else 'green' for x in df['importance']] + df.sort_values('importance', inplace=True, ascending=False) + df.reset_index(inplace=True) + # Draw plot + plt.figure(figsize=(90, 200), dpi=100) + plt.hlines(y=df.index, xmin=0, xmax=df.importance) + for x, y, tex in zip(df.importance, df.index, df.importance): + plt.text( + x, + y, + round(tex, 2), + horizontalalignment='right' if x < 0 else 'left', + verticalalignment='center', + fontdict={ + 'color': 'red' if x < 0 else 'green', + 'size': 14 + }) + # Decorations + plt.yticks(df.index, df.feature_name, fontsize=20) + plt.title('Feature Importance', fontdict={'size': 30}) + plt.grid(linestyle='--', alpha=0.5) + plt.xlim(0, 1) + with tf.gfile.GFile( + os.path.join(self._output_dir, + 'feature_importance_pic_%s.png' % group_name), 'wb') as f: + plt.savefig(f, format='png') + + def _process_config(self, feature_importance_map, white_feature_group): + """Process model config and fg config with feature selection.""" + excluded_features = set() + for group_name, feature_importance in feature_importance_map.items(): + for i, (feature_name, _) in enumerate(feature_importance.items()): + if i >= self._topk: + excluded_features.add(feature_name) + + config = config_util.get_configs_from_pipeline_file(self._config_path) + # keep sequence features and side-infos + sequence_features = set() + for feature_group in config.model_config.feature_groups: + for sequence_feature in feature_group.sequence_features: + for seq_att_map in sequence_feature.seq_att_map: + for key in seq_att_map.key: + sequence_features.add(key) + for hist_seq in seq_att_map.hist_seq: + sequence_features.add(hist_seq) + # compat with din + for sequence_feature in config.model_config.seq_att_groups: + for seq_att_map in sequence_feature.seq_att_map: + for key in seq_att_map.key: + sequence_features.add(key) + for hist_seq in seq_att_map.hist_seq: + sequence_features.add(hist_seq) + # sequence feature group + for feature_group in config.model_config.feature_groups: + group_name = feature_group.group_name + if group_name not in white_feature_group: + continue + for feature_name in feature_group.feature_names: + sequence_features.add(feature_name) + + excluded_features = excluded_features - sequence_features + + for feature_config in config_util.get_compatible_feature_configs(config): + feature_name = feature_config.input_names[0] + if feature_config.HasField('feature_name'): + feature_name = feature_config.feature_name + if feature_name in excluded_features: + feature_config.feature_type = FeatureConfig.FeatureType.ConstFeature + + config.model_config.ClearField('variational_dropout') + config_util.save_message( + config, + os.path.join(self._output_dir, os.path.basename(self._config_path))) + + if self._fg_path is not None and len(self._fg_path) > 0: + with tf.gfile.Open(self._fg_path) as f: + fg_json = json.load(f, object_pairs_hook=OrderedDict) + features = [] + for feature in fg_json['features']: + if 'feature_name' in feature: + if feature['feature_name'] not in excluded_features: + features.append(feature) + else: + features.append(feature) + fg_json['features'] = features + + fg_file = os.path.join(self._output_dir, os.path.basename(self._fg_path)) + with tf.gfile.Open(fg_file, 'w') as f: + json.dump(fg_json, f, indent=4) + + if __name__ == '__main__': if FLAGS.model_type == 'variational_dropout': fs = VariationalDropoutFS( @@ -304,6 +459,15 @@ def _visualize_feature_importance(self, feature_importance, group_name): fg_path=FLAGS.fg_path, visualize=FLAGS.visualize) fs.process() + elif FLAGS.model_type == 'fscd': + fs = FSCD( + FLAGS.config_path, + FLAGS.output_dir, + FLAGS.topk, + checkpoint_path=FLAGS.checkpoint_path, + fg_path=FLAGS.fg_path, + visualize=FLAGS.visualize) + fs.process() else: raise ValueError('Unknown feature selection model type %s' % FLAGS.model_type) diff --git a/easy_rec/python/tools/view_saved_model.py b/easy_rec/python/tools/view_saved_model.py new file mode 100644 index 000000000..022bcf1aa --- /dev/null +++ b/easy_rec/python/tools/view_saved_model.py @@ -0,0 +1,39 @@ +# -*- encoding:utf-8 -*- +# Copyright (c) Alibaba, Inc. and its affiliates. +import argparse +import logging + +from google.protobuf import text_format +from tensorflow.core.protobuf import saved_model_pb2 +from tensorflow.python.platform.gfile import GFile + +logging.basicConfig( + format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s', + level=logging.INFO) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--input', type=str, default=None, help='saved model path') + parser.add_argument( + '--output', type=str, default=None, help='saved model save path') + args = parser.parse_args() + + assert args.input is not None and args.output is not None + + logging.info('saved_model_path: %s' % args.input) + + saved_model = saved_model_pb2.SavedModel() + if args.input.endswith('.pb'): + with GFile(args.input, 'rb') as fin: + saved_model.ParseFromString(fin.read()) + else: + with GFile(args.input, 'r') as fin: + text_format.Merge(fin.read(), saved_model) + + if args.output.endswith('.pbtxt'): + with GFile(args.output, 'w') as fout: + fout.write(text_format.MessageToString(saved_model, as_utf8=True)) + else: + with GFile(args.output, 'wb') as fout: + fout.write(saved_model.SerializeToString()) diff --git a/easy_rec/python/train_eval.py b/easy_rec/python/train_eval.py index bdb65eb0a..f12784ac1 100644 --- a/easy_rec/python/train_eval.py +++ b/easy_rec/python/train_eval.py @@ -95,8 +95,12 @@ help='is use check mode') parser.add_argument( '--selected_cols', type=str, default=None, help='select input columns') + parser.add_argument('--gpu', type=str, default=None, help='gpu id') args, extra_args = parser.parse_known_args() + if args.gpu is not None: + os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu + edit_config_json = {} if args.edit_config_json: edit_config_json = json.loads(args.edit_config_json) diff --git a/easy_rec/python/utils/__init__.py b/easy_rec/python/utils/__init__.py index e69de29bb..09dc89476 100644 --- a/easy_rec/python/utils/__init__.py +++ b/easy_rec/python/utils/__init__.py @@ -0,0 +1,15 @@ +class conditional(object): + """Wrap another context manager and enter it only if condition is true.""" + + def __init__(self, condition, contextmanager): + self.condition = condition + self.contextmanager = contextmanager + + def __enter__(self): + """Conditionally enter a context manager.""" + if self.condition: + return self.contextmanager.__enter__() + + def __exit__(self, *args): + if self.condition: + return self.contextmanager.__exit__(*args) diff --git a/easy_rec/python/utils/config_util.py b/easy_rec/python/utils/config_util.py index b63a02f71..e35175be9 100644 --- a/easy_rec/python/utils/config_util.py +++ b/easy_rec/python/utils/config_util.py @@ -5,6 +5,7 @@ Such as Hyper parameter tuning or automatic feature expanding. """ +import argparse import datetime import json import logging @@ -605,3 +606,144 @@ def process_multi_file_input_path(sampler_config_input_path): input_path = sampler_config_input_path return input_path + + +def change_configured_embedding_dim(pipeline_config_path, groups, emb_dim): + """Change the embedding dimension of the features in groups. + + Args: + pipeline_config_path: Path to pipeline_pb2.EasyRecConfig text + proto. + groups: the names of feature group to be changed + emb_dim: target embedding dimension + + Returns: + Dictionary of configuration objects. Keys are `model`, `train_config`, + `train_input_config`, `eval_config`, `eval_input_config`. Value are the + corresponding config objects. + """ + pipeline_config = get_configs_from_pipeline_file(pipeline_config_path, False) + + target_groups = set(groups.split(',')) + features = set() + conf = pipeline_config.model_config + for group in conf.feature_groups: + if group.group_name not in target_groups: + continue + for feature in group.feature_names: + features.add(feature) + + feature_configs = get_compatible_feature_configs(pipeline_config) + for fea_conf in feature_configs: + fea_name = fea_conf.input_names[0] + if fea_conf.HasField('feature_name'): + fea_name = fea_conf.feature_name + if fea_name in features: + fea_conf.embedding_dim = emb_dim + + return pipeline_config + + +def remove_redundant_config(pipeline_config_path, remove_input=False): + """Remove redundant configs from a file containing pipeline_pb2.EasyRecConfig. + + Args: + pipeline_config_path: Path to pipeline_pb2.EasyRecConfig text + proto. + remove_input: whether to remove input configs + + Returns: + Dictionary of configuration objects. Keys are `model`, `train_config`, + `train_input_config`, `eval_config`, `eval_input_config`. Value are the + corresponding config objects. + """ + pipeline_config = get_configs_from_pipeline_file(pipeline_config_path, False) + + inputs = set() + features = set() + conf = pipeline_config.model_config + for group in conf.feature_groups: + for feature in group.feature_names: + features.add(feature) + + feature_configs = get_compatible_feature_configs(pipeline_config) + offset = 0 + for i in range(len(feature_configs)): + fea_conf = feature_configs[i - offset] + fea_name = fea_conf.input_names[0] + if fea_conf.HasField('feature_name'): + fea_name = fea_conf.feature_name + if fea_name not in features: + logging.info("redundant feature:" + fea_name) + del feature_configs[i - offset] + offset += 1 + elif remove_input: + for input_name in fea_conf.input_names: + inputs.add(input_name) + + if remove_input: + for label in pipeline_config.data_config.label_fields: + inputs.add(label) + input_fields = pipeline_config.data_config.input_fields + offset = 0 + for i in range(len(input_fields)): + field = input_fields[i - offset] + if field.input_name not in inputs: + del input_fields[i - offset] + offset += 1 + return pipeline_config + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--cmd', + type=str, + choices=['format', 'set_emb_dim', 'rm_redundancy'], + required=True, + help='Path to pipeline config file.') + parser.add_argument( + '-c', '--pipeline_config_path', + type=str, + default=None, + required=True, + help='Path to pipeline config file.') + parser.add_argument( + '-g', '--feature_groups', + type=str, + default=None, + help='The name of feature group to be changed.') + parser.add_argument( + '--rm_input', + type=bool, + default=False, + help='Whether to remove redundancy input.') + parser.add_argument( + '-d', '--embedding_dim', + type=int, + default=None, + help='The embedding dim to be changed to.') + parser.add_argument( + '-o', '--save_config_path', + type=str, + default=None, + required=True, + help='Path to save changed config.') + + args, extra_args = parser.parse_known_args() + if args.cmd == 'format': + config = get_configs_from_pipeline_file(args.pipeline_config_path) + save_message(config, args.save_config_path) + elif args.cmd == 'set_emb_dim': + if args.feature_groups is None: + raise ValueError('--feature_groups must be set') + if args.embedding_dim is None: + raise ValueError('--embedding_dim must be set') + + config = change_configured_embedding_dim(args.pipeline_config_path, + args.feature_groups, + args.embedding_dim) + save_message(config, args.save_config_path) + elif args.cmd == 'rm_redundancy': + config = remove_redundant_config(args.pipeline_config_path) + save_message(config, args.save_config_path) diff --git a/easy_rec/python/utils/dag.py b/easy_rec/python/utils/dag.py new file mode 100644 index 000000000..00646f732 --- /dev/null +++ b/easy_rec/python/utils/dag.py @@ -0,0 +1,205 @@ +from collections import OrderedDict +from collections import defaultdict +from copy import copy +from copy import deepcopy + + +class DAG(object): + """Directed acyclic graph implementation.""" + + def __init__(self): + """Construct a new DAG with no nodes or edges.""" + self.reset_graph() + + def add_node(self, node_name, graph=None): + """Add a node if it does not exist yet, or error out.""" + if not graph: + graph = self.graph + if node_name in graph: + raise KeyError('node %s already exists' % node_name) + graph[node_name] = set() + + def add_node_if_not_exists(self, node_name, graph=None): + try: + self.add_node(node_name, graph=graph) + except KeyError: + pass + + def delete_node(self, node_name, graph=None): + """Deletes this node and all edges referencing it.""" + if not graph: + graph = self.graph + if node_name not in graph: + raise KeyError('node %s does not exist' % node_name) + graph.pop(node_name) + + for node, edges in graph.items(): + if node_name in edges: + edges.remove(node_name) + + def delete_node_if_exists(self, node_name, graph=None): + try: + self.delete_node(node_name, graph=graph) + except KeyError: + pass + + def add_edge(self, ind_node, dep_node, graph=None): + """Add an edge (dependency) between the specified nodes.""" + if not graph: + graph = self.graph + if ind_node not in graph or dep_node not in graph: + raise KeyError('one or more nodes do not exist in graph') + test_graph = deepcopy(graph) + test_graph[ind_node].add(dep_node) + is_valid, message = self.validate(test_graph) + if is_valid: + graph[ind_node].add(dep_node) + else: + raise Exception() + + def delete_edge(self, ind_node, dep_node, graph=None): + """Delete an edge from the graph.""" + if not graph: + graph = self.graph + if dep_node not in graph.get(ind_node, []): + raise KeyError('this edge does not exist in graph') + graph[ind_node].remove(dep_node) + + def rename_edges(self, old_task_name, new_task_name, graph=None): + """Change references to a task in existing edges.""" + if not graph: + graph = self.graph + for node, edges in graph.items(): + + if node == old_task_name: + graph[new_task_name] = copy(edges) + del graph[old_task_name] + + else: + if old_task_name in edges: + edges.remove(old_task_name) + edges.add(new_task_name) + + def predecessors(self, node, graph=None): + """Returns a list of all predecessors of the given node.""" + if graph is None: + graph = self.graph + return [key for key in graph if node in graph[key]] + + def downstream(self, node, graph=None): + """Returns a list of all nodes this node has edges towards.""" + if graph is None: + graph = self.graph + if node not in graph: + raise KeyError('node %s is not in graph' % node) + return list(graph[node]) + + def all_downstreams(self, node, graph=None): + """Returns a list of all nodes ultimately downstream of the given node in the dependency graph. + + in topological order. + """ + if graph is None: + graph = self.graph + nodes = [node] + nodes_seen = set() + i = 0 + while i < len(nodes): + downstreams = self.downstream(nodes[i], graph) + for downstream_node in downstreams: + if downstream_node not in nodes_seen: + nodes_seen.add(downstream_node) + nodes.append(downstream_node) + i += 1 + return list( + filter(lambda node: node in nodes_seen, + self.topological_sort(graph=graph))) + + def all_leaves(self, graph=None): + """Return a list of all leaves (nodes with no downstreams).""" + if graph is None: + graph = self.graph + return [key for key in graph if not graph[key]] + + def from_dict(self, graph_dict): + """Reset the graph and build it from the passed dictionary. + + The dictionary takes the form of {node_name: [directed edges]} + """ + self.reset_graph() + for new_node in graph_dict.keys(): + self.add_node(new_node) + for ind_node, dep_nodes in graph_dict.items(): + if not isinstance(dep_nodes, list): + raise TypeError('dict values must be lists') + for dep_node in dep_nodes: + self.add_edge(ind_node, dep_node) + + def reset_graph(self): + """Restore the graph to an empty state.""" + self.graph = OrderedDict() + + def ind_nodes(self, graph=None): + """Returns a list of all nodes in the graph with no dependencies.""" + if graph is None: + graph = self.graph + + dependent_nodes = set( + node for dependents in graph.values() for node in dependents) + return [node for node in graph.keys() if node not in dependent_nodes] + + def validate(self, graph=None): + """Returns (Boolean, message) of whether DAG is valid.""" + graph = graph if graph is not None else self.graph + if len(self.ind_nodes(graph)) == 0: + return False, 'no independent nodes detected' + try: + self.topological_sort(graph) + except ValueError: + return False, 'failed topological sort' + return True, 'valid' + + def topological_sort(self, graph=None): + """Returns a topological ordering of the DAG. + + Raises an error if this is not possible (graph is not valid). + """ + if graph is None: + graph = self.graph + result = [] + in_degree = defaultdict(lambda: 0) + + for u in graph: + for v in graph[u]: + in_degree[v] += 1 + ready = [node for node in graph if not in_degree[node]] + + while ready: + u = ready.pop() + result.append(u) + for v in graph[u]: + in_degree[v] -= 1 + if in_degree[v] == 0: + ready.append(v) + + if len(result) == len(graph): + return result + else: + raise ValueError('graph is not acyclic') + + def size(self): + return len(self.graph) + + +if __name__ == '__main__': + dag = DAG() + dag.add_node('a') + dag.add_node('b') + dag.add_node('c') + dag.add_node('d') + dag.add_edge('a', 'b') + dag.add_edge('a', 'd') + dag.add_edge('b', 'c') + print(dag.topological_sort()) + print(dag.graph) + print(dag.all_downstreams('b')) diff --git a/easy_rec/python/utils/load_class.py b/easy_rec/python/utils/load_class.py index 2da1e4e41..9ac749c76 100644 --- a/easy_rec/python/utils/load_class.py +++ b/easy_rec/python/utils/load_class.py @@ -220,3 +220,30 @@ def create_class(cls, name): return newclass return RegisterABCMeta + + +def load_keras_layer(name): + """Load keras layer class. + + Args: + name: keras layer name + + Return: + (layer_class, is_customize) + """ + name = name.strip() + if name == '' or name is None: + return None + + path = 'easy_rec.python.layers.keras.' + name + try: + cls = pydoc.locate(path) + if cls is not None: + return cls, True + path = 'tensorflow.keras.layers.' + name + return pydoc.locate(path), False + except pydoc.ErrorDuringImport: + print('load keras layer %s failed' % name) + logging.error('load keras layer %s failed: %s' % + (name, traceback.format_exc())) + return None, False diff --git a/easy_rec/python/utils/tf_utils.py b/easy_rec/python/utils/tf_utils.py index 20e19496c..160a2f67a 100644 --- a/easy_rec/python/utils/tf_utils.py +++ b/easy_rec/python/utils/tf_utils.py @@ -33,3 +33,51 @@ def get_col_type(tf_type): } assert tf_type in type_map, 'invalid type: %s' % tf_type return type_map[tf_type] + + +def get_config_type(tf_type): + type_map = { + tf.int32: DatasetConfig.INT32, + tf.int64: DatasetConfig.INT64, + tf.string: DatasetConfig.STRING, + tf.bool: DatasetConfig.BOOL, + tf.float32: DatasetConfig.FLOAT, + tf.double: DatasetConfig.DOUBLE + } + assert tf_type in type_map, 'invalid type: %s' % tf_type + return type_map[tf_type] + + +# def add_op(inputs): +# if not isinstance(inputs, list): +# return inputs +# if len(inputs) == 1: +# if isinstance(inputs[0], list): +# return tf.keras.layers.Add()(inputs[0]) +# return inputs[0] +# return tf.keras.layers.Add()(inputs) + +# def dot_op(features): +# """Compute inner dot between any two pair tensors. +# +# Args: +# features: must be one of +# - List of 2D tensor with shape: ``(batch_size,embedding_size)``. +# - Or a 3D tensor with shape: ``(batch_size,field_size,embedding_size)`` +# Return: +# - 2D tensor with shape: ``(batch_size, 1)``. +# """ +# if isinstance(features, (list, tuple)): +# features = tf.stack(features, axis=1) +# assert features.shape.ndims == 3, 'input of dot func must be a 3D tensor or a list of 2D tensors' +# +# batch_size = tf.shape(features)[0] +# matrixdot = tf.matmul(features, features, transpose_b=True) +# feature_dim = matrixdot.shape[-1] +# +# ones_mat = tf.ones_like(matrixdot) +# lower_tri_mat = ones_mat - tf.linalg.band_part(ones_mat, 0, -1) +# lower_tri_mask = tf.cast(lower_tri_mat, tf.bool) +# result = tf.boolean_mask(matrixdot, lower_tri_mask) +# output_dim = feature_dim * (feature_dim - 1) // 2 +# return tf.reshape(result, (batch_size, output_dim)) diff --git a/easy_rec/version.py b/easy_rec/version.py index f70f1bfba..520cefe3d 100644 --- a/easy_rec/version.py +++ b/easy_rec/version.py @@ -1,3 +1,3 @@ # -*- encoding:utf-8 -*- # Copyright (c) Alibaba, Inc. and its affiliates. -__version__ = '0.6.3' +__version__ = '1.0.0' diff --git a/examples/configs/dcn_backbone_on_movielens.config b/examples/configs/dcn_backbone_on_movielens.config new file mode 100644 index 000000000..3376db96f --- /dev/null +++ b/examples/configs/dcn_backbone_on_movielens.config @@ -0,0 +1,203 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/dcn_on_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: false +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_name: 'DCN v2' + model_class: 'RankModel' + feature_groups: { + group_name: 'all' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: "deep" + inputs { + feature_group_name: 'all' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } + } + } + blocks { + name: "dcn" + inputs { + feature_group_name: 'all' + input_fn: 'lambda x: [x, x]' + } + recurrent { + num_steps: 3 + fixed_input_index: 0 + keras_layer { + class_name: 'Cross' + } + } + } + concat_blocks: ['deep', 'dcn'] + top_mlp { + hidden_units: [64, 32, 16] + } + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/configs/deepfm_backbone_on_criteo.config b/examples/configs/deepfm_backbone_on_criteo.config new file mode 100644 index 000000000..06c60f966 --- /dev/null +++ b/examples/configs/deepfm_backbone_on_criteo.config @@ -0,0 +1,635 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/deepfm_backbone_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + embedding_dim: 16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + embedding_dim: 16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + embedding_dim: 16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_name: 'DeepFM' + model_class: 'RankModel' + feature_groups: { + group_name: "deep_features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + feature_groups: { + group_name: "wide_features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:WIDE + } + backbone { + blocks { + name: 'wide_logit' + inputs { + feature_group_name: 'wide_features' + } + lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' + } + } + blocks { + name: 'deep_features' + inputs { + feature_group_name: 'deep_features' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'fm' + inputs { + block_name: 'deep_features' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'FM' + st_params { + fields { + key: 'use_variant' + value { bool_value: true } + } + } + } + } + blocks { + name: 'deep' + inputs { + block_name: 'deep_features' + input_fn: 'lambda x: x[0]' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } + } + } + concat_blocks: ['wide_logit', 'fm', 'deep'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + wide_output_dim: 1 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/deepfm_backbone_on_criteo_with_autodis.config b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config new file mode 100644 index 000000000..9d1856cae --- /dev/null +++ b/examples/configs/deepfm_backbone_on_criteo_with_autodis.config @@ -0,0 +1,751 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/deepfm_autodis_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + feature_name: "D1" + input_names: "F1" + embedding_dim:16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + feature_name: "D2" + input_names: "F2" + embedding_dim:16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + feature_name: "D3" + input_names: "F3" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + feature_name: "D4" + input_names: "F4" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + feature_name: "D5" + input_names: "F5" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + feature_name: "D6" + input_names: "F6" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + feature_name: "D7" + input_names: "F7" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + feature_name: "D8" + input_names: "F8" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + feature_name: "D9" + input_names: "F9" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + feature_name: "D10" + input_names: "F10" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + feature_name: "D11" + input_names: "F11" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + feature_name: "D12" + input_names: "F12" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + feature_name: "D13" + input_names: "F13" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } +} +model_config: { + model_name: 'DeepFM with AutoDis' + model_class: 'RankModel' + feature_groups: { + group_name: "numerical_features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "categorical_features" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + feature_groups: { + group_name: "wide_features" + feature_names: "D1" + feature_names: "D2" + feature_names: "D3" + feature_names: "D4" + feature_names: "D5" + feature_names: "D6" + feature_names: "D7" + feature_names: "D8" + feature_names: "D9" + feature_names: "D10" + feature_names: "D11" + feature_names: "D12" + feature_names: "D13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:WIDE + } + backbone { + blocks { + name: 'wide_logit' + inputs { + feature_group_name: 'wide_features' + } + lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' + } + } + blocks { + name: 'num_emb' + inputs { + feature_group_name: 'numerical_features' + } + keras_layer { + class_name: 'AutoDisEmbedding' + auto_dis_embedding { + embedding_dim: 16 + num_bins: 20 + temperature: 0.815 + output_tensor_list: true + } + } + } + blocks { + name: 'categorical_features' + inputs { + feature_group_name: 'categorical_features' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'fm' + inputs { + block_name: 'categorical_features' + input_fn: 'lambda x: x[1]' + } + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'FM' + fm { + use_variant: true + } + } + } + blocks { + name: 'deep' + inputs { + block_name: 'categorical_features' + input_fn: 'lambda x: x[0]' + } + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[0]' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } + } + } + # no wide_logit may have better performance + concat_blocks: ['wide_logit', 'fm', 'deep'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + wide_output_dim: 1 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/deepfm_backbone_on_criteo_with_periodic.config b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config new file mode 100644 index 000000000..3ce65c8bf --- /dev/null +++ b/examples/configs/deepfm_backbone_on_criteo_with_periodic.config @@ -0,0 +1,749 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/deepfm_periodic_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + feature_name: "D1" + input_names: "F1" + embedding_dim:16 + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + feature_name: "D2" + input_names: "F2" + embedding_dim:16 + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + feature_name: "D3" + input_names: "F3" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + feature_name: "D4" + input_names: "F4" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + feature_name: "D5" + input_names: "F5" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + feature_name: "D6" + input_names: "F6" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + feature_name: "D7" + input_names: "F7" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + feature_name: "D8" + input_names: "F8" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + feature_name: "D9" + input_names: "F9" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + feature_name: "D10" + input_names: "F10" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + feature_name: "D11" + input_names: "F11" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + feature_name: "D12" + input_names: "F12" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + feature_name: "D13" + input_names: "F13" + embedding_dim:16 + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } +} +model_config: { + model_name: 'DeepFM with Periodic' + model_class: 'RankModel' + feature_groups: { + group_name: "numerical_features" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "categorical_features" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + feature_groups: { + group_name: "wide_features" + feature_names: "D1" + feature_names: "D2" + feature_names: "D3" + feature_names: "D4" + feature_names: "D5" + feature_names: "D6" + feature_names: "D7" + feature_names: "D8" + feature_names: "D9" + feature_names: "D10" + feature_names: "D11" + feature_names: "D12" + feature_names: "D13" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:WIDE + } + backbone { + blocks { + name: 'wide_logit' + inputs { + feature_group_name: 'wide_features' + } + lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' + } + } + blocks { + name: 'num_emb' + inputs { + feature_group_name: 'numerical_features' + } + keras_layer { + class_name: 'PeriodicEmbedding' + periodic_embedding { + embedding_dim: 16 + sigma: 0.005 + output_tensor_list: true + } + } + } + blocks { + name: 'categorical_features' + inputs { + feature_group_name: 'categorical_features' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'fm' + inputs { + block_name: 'categorical_features' + input_fn: 'lambda x: x[1]' + } + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'FM' + fm { + use_variant: true + } + } + } + blocks { + name: 'deep' + inputs { + block_name: 'categorical_features' + input_fn: 'lambda x: x[0]' + } + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[0]' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64] + } + } + } + concat_blocks: ['wide_logit', 'fm', 'deep'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + wide_output_dim: 1 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/deepfm_backbone_on_movielens.config b/examples/configs/deepfm_backbone_on_movielens.config new file mode 100644 index 000000000..36ef7ace3 --- /dev/null +++ b/examples/configs/deepfm_backbone_on_movielens.config @@ -0,0 +1,246 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/deepfm_backbone_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [8, 4, 4] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_name: 'DeepFM' + model_class: 'RankModel' + feature_groups: { + group_name: 'wide' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: WIDE + } + feature_groups: { + group_name: 'features' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + feature_names: 'title' + wide_deep: DEEP + } + backbone { + blocks { + name: 'wide_logit' + inputs { + feature_group_name: 'wide' + } + lambda { + expression: 'lambda x: tf.reduce_sum(x, axis=1, keepdims=True)' + } + } + blocks { + name: 'features' + inputs { + feature_group_name: 'features' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'fm' + inputs { + block_name: 'features' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'FM' + } + } + blocks { + name: 'deep' + inputs { + block_name: 'features' + input_fn: 'lambda x: x[0]' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128, 64, 1] + use_final_bn: false + final_activation: 'linear' + } + } + } + blocks { + name: 'add' + inputs { + block_name: 'wide_logit' + } + inputs { + block_name: 'fm' + } + inputs { + block_name: 'deep' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Add' + } + } + concat_blocks: 'add' + } + rank_model { + l2_regularization: 1e-4 + wide_output_dim: 1 + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/configs/deepfm_on_criteo.config b/examples/configs/deepfm_on_criteo.config index c482cf246..fc8537f0d 100644 --- a/examples/configs/deepfm_on_criteo.config +++ b/examples/configs/deepfm_on_criteo.config @@ -241,91 +241,91 @@ data_config { feature_config: { features: { input_names: "F1" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val:0.0 max_val: 5775.0 } features: { input_names: "F2" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: -3.0 max_val: 257675.0 } features: { input_names: "F3" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 65535.0 } features: { input_names: "F4" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 969.0 } features: { input_names: "F5" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 23159456.0 } features: { input_names: "F6" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 431037.0 } features: { input_names: "F7" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 56311.0 } features: { input_names: "F8" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 6047.0 } features: { input_names: "F9" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 29019.0 } features: { input_names: "F10" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 46.0 } features: { input_names: "F11" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 231.0 } features: { input_names: "F12" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 4008.0 } features: { input_names: "F13" - embedding_dim:10 + embedding_dim:16 feature_type: RawFeature min_val: 0.0 max_val: 7393.0 diff --git a/examples/configs/deepfm_on_movielens.config b/examples/configs/deepfm_on_movielens.config index cab092c20..0468ae12f 100644 --- a/examples/configs/deepfm_on_movielens.config +++ b/examples/configs/deepfm_on_movielens.config @@ -137,7 +137,7 @@ feature_config: { sequence_combiner: { text_cnn: { filter_sizes: [2, 3, 4] - num_filters: [16, 8, 8] + num_filters: [8, 4, 4] } } } diff --git a/examples/configs/dlrm_backbone_on_criteo.config b/examples/configs/dlrm_backbone_on_criteo.config new file mode 100644 index 000000000..6dc5dd41e --- /dev/null +++ b/examples/configs/dlrm_backbone_on_criteo.config @@ -0,0 +1,578 @@ +# align with raw dlrm model +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_backbone_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_name: 'DLRM' + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'bottom_mlp' + inputs { + feature_group_name: 'dense' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [64, 32, 16] + } + } + } + blocks { + name: 'sparse' + inputs { + feature_group_name: 'sparse' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + block_name: 'bottom_mlp' + input_fn: 'lambda x: [x]' + } + inputs { + block_name: 'sparse' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'DotInteraction' + } + } + blocks { + name: 'sparse_2d' + inputs { + block_name: 'sparse' + input_fn: 'lambda x: x[0]' + } + } + concat_blocks: ['sparse_2d', 'dot'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_on_criteo.config b/examples/configs/dlrm_on_criteo.config new file mode 100644 index 000000000..e6c45d574 --- /dev/null +++ b/examples/configs/dlrm_on_criteo.config @@ -0,0 +1,534 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_criteo_ckpt" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_class: 'DLRM' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + dlrm { + bot_dnn { + hidden_units: [64, 32, 16] + } + top_dnn { + hidden_units: [256, 128, 64] + } + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_on_criteo_with_autodis.config b/examples/configs/dlrm_on_criteo_with_autodis.config new file mode 100644 index 000000000..c6f522f95 --- /dev/null +++ b/examples/configs/dlrm_on_criteo_with_autodis.config @@ -0,0 +1,587 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_autodis_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_name: 'DLRM with autodis' + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'num_emb' + inputs { + feature_group_name: 'dense' + } + keras_layer { + class_name: 'AutoDisEmbedding' + auto_dis_embedding { + embedding_dim: 16 + num_bins: 40 + temperature: 0.815 + output_tensor_list: true + } + } + } + blocks { + name: 'sparse' + inputs { + feature_group_name: 'sparse' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + inputs { + block_name: 'sparse' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'DotInteraction' + } + } + blocks { + name: 'sparse_2d' + inputs { + block_name: 'sparse' + input_fn: 'lambda x: x[0]' + } + } + blocks { + name: 'num_emb_2d' + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[0]' + } + } + concat_blocks: ['num_emb_2d', 'dot', 'sparse_2d'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_on_criteo_with_periodic.config b/examples/configs/dlrm_on_criteo_with_periodic.config new file mode 100644 index 000000000..c42e8252b --- /dev/null +++ b/examples/configs/dlrm_on_criteo_with_periodic.config @@ -0,0 +1,595 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_periodic_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_name: 'dlrm with periodic' + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'num_emb' + inputs { + feature_group_name: 'dense' + } + keras_layer { + class_name: 'PeriodicEmbedding' + st_params { + fields { + key: "output_tensor_list" + value { bool_value: true } + } + fields { + key: "embedding_dim" + value { number_value: 16 } + } + fields { + key: "sigma" + value { number_value: 0.005 } + } + } + } + } + blocks { + name: 'sparse' + inputs { + feature_group_name: 'sparse' + } + input_layer { + output_2d_tensor_and_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[1]' + } + inputs { + block_name: 'sparse' + input_fn: 'lambda x: x[1]' + } + keras_layer { + class_name: 'DotInteraction' + } + } + blocks { + name: 'sparse_2d' + inputs { + block_name: 'sparse' + input_fn: 'lambda x: x[0]' + } + } + blocks { + name: 'num_emb_2d' + inputs { + block_name: 'num_emb' + input_fn: 'lambda x: x[0]' + } + } + concat_blocks: ['num_emb_2d', 'dot', 'sparse_2d'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/dlrm_standard_on_criteo.config b/examples/configs/dlrm_standard_on_criteo.config new file mode 100644 index 000000000..df82e7990 --- /dev/null +++ b/examples/configs/dlrm_standard_on_criteo.config @@ -0,0 +1,569 @@ +train_input_path: "examples/data/criteo/criteo_train_data" +eval_input_path: "examples/data/criteo/criteo_test_data" +model_dir: "examples/ckpt/dlrm_standard_criteo" + +train_config { + log_step_count_steps: 500 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 20000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } +} + +data_config { + separator: "\t" + input_fields: { + input_name: "label" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F1" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F2" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F3" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F4" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F5" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F6" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F7" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F8" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F9" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F10" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F11" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F12" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "F13" + input_type: FLOAT + default_val:"0" + } + input_fields: { + input_name: "C1" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C2" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C3" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C4" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C5" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C6" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C7" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C8" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C9" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C10" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C11" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C12" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C13" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C14" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C15" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C16" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C17" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C18" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C19" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C20" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C21" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C22" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C23" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C24" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C25" + input_type: STRING + default_val:"" + } + input_fields: { + input_name: "C26" + input_type: STRING + default_val:"" + } + label_fields: "label" + + batch_size: 4096 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput +} + +feature_config: { + features: { + input_names: "F1" + feature_type: RawFeature + min_val:0.0 + max_val: 5775.0 + } + features: { + input_names: "F2" + feature_type: RawFeature + min_val: -3.0 + max_val: 257675.0 + } + features: { + input_names: "F3" + feature_type: RawFeature + min_val: 0.0 + max_val: 65535.0 + } + features: { + input_names: "F4" + feature_type: RawFeature + min_val: 0.0 + max_val: 969.0 + } + features: { + input_names: "F5" + feature_type: RawFeature + min_val: 0.0 + max_val: 23159456.0 + } + features: { + input_names: "F6" + feature_type: RawFeature + min_val: 0.0 + max_val: 431037.0 + } + features: { + input_names: "F7" + feature_type: RawFeature + min_val: 0.0 + max_val: 56311.0 + } + features: { + input_names: "F8" + feature_type: RawFeature + min_val: 0.0 + max_val: 6047.0 + } + features: { + input_names: "F9" + feature_type: RawFeature + min_val: 0.0 + max_val: 29019.0 + } + features: { + input_names: "F10" + feature_type: RawFeature + min_val: 0.0 + max_val: 46.0 + } + features: { + input_names: "F11" + feature_type: RawFeature + min_val: 0.0 + max_val: 231.0 + } + features: { + input_names: "F12" + feature_type: RawFeature + min_val: 0.0 + max_val: 4008.0 + } + features: { + input_names: "F13" + feature_type: RawFeature + min_val: 0.0 + max_val: 7393.0 + } + features: { + input_names: "C1" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C2" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C3" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C4" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C5" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C6" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C7" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C8" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C9" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C10" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C11" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C12" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C13" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C14" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C15" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C16" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C17" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C18" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C19" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C20" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C21" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C22" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C23" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C24" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + }features: { + input_names: "C25" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } + features: { + input_names: "C26" + hash_bucket_size: 1000000 + feature_type: IdFeature + embedding_dim: 16 + } +} +model_config: { + model_name: 'Stardard DLRM' + model_class: 'RankModel' + feature_groups: { + group_name: "dense" + feature_names: "F1" + feature_names: "F2" + feature_names: "F3" + feature_names: "F4" + feature_names: "F5" + feature_names: "F6" + feature_names: "F7" + feature_names: "F8" + feature_names: "F9" + feature_names: "F10" + feature_names: "F11" + feature_names: "F12" + feature_names: "F13" + wide_deep:DEEP + } + feature_groups: { + group_name: "sparse" + feature_names: "C1" + feature_names: "C2" + feature_names: "C3" + feature_names: "C4" + feature_names: "C5" + feature_names: "C6" + feature_names: "C7" + feature_names: "C8" + feature_names: "C9" + feature_names: "C10" + feature_names: "C11" + feature_names: "C12" + feature_names: "C13" + feature_names: "C14" + feature_names: "C15" + feature_names: "C16" + feature_names: "C17" + feature_names: "C18" + feature_names: "C19" + feature_names: "C20" + feature_names: "C21" + feature_names: "C22" + feature_names: "C23" + feature_names: "C24" + feature_names: "C25" + feature_names: "C26" + wide_deep:DEEP + } + backbone { + blocks { + name: 'bottom_mlp' + inputs { + feature_group_name: 'dense' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [64, 32, 16] + } + } + } + blocks { + name: 'sparse' + inputs { + feature_group_name: 'sparse' + } + input_layer { + only_output_feature_list: true + } + } + blocks { + name: 'dot' + inputs { + block_name: 'bottom_mlp' + input_fn: 'lambda x: [x]' + } + inputs { + block_name: 'sparse' + } + keras_layer { + class_name: 'DotInteraction' + } + } + concat_blocks: ['bottom_mlp', 'dot'] + top_mlp { + hidden_units: [256, 128, 64] + } + } + rank_model { + l2_regularization: 1e-5 + } + embedding_regularization: 1e-5 +} diff --git a/examples/configs/fibinet_on_movielens.config b/examples/configs/fibinet_on_movielens.config new file mode 100644 index 000000000..1fe36aac3 --- /dev/null +++ b/examples/configs/fibinet_on_movielens.config @@ -0,0 +1,204 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/fibinet_on_movieslen_ckpt" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: False +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_name: 'FiBiNet' + model_class: 'RankModel' + feature_groups: { + group_name: 'all' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: "all" + inputs { + feature_group_name: "all" + } + input_layer { + do_batch_norm: true + only_output_feature_list: true + } + } + blocks { + name: "fibinet" + inputs { + block_name: "all" + } + keras_layer { + class_name: 'FiBiNet' + fibinet { + senet { + reduction_ratio: 4 + } + bilinear { + type: 'each' + num_output_units: 512 + } + mlp { + hidden_units: [512, 256] + } + } + } + } + concat_blocks: ['fibinet'] + } + rank_model { + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/configs/masknet_on_movielens.config b/examples/configs/masknet_on_movielens.config new file mode 100644 index 000000000..fd3dc1342 --- /dev/null +++ b/examples/configs/masknet_on_movielens.config @@ -0,0 +1,199 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/masknet_on_movieslen_ckpt" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_name: 'MaskNet' + model_class: 'RankModel' + feature_groups: { + group_name: 'all' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: "mask_net" + inputs { + feature_group_name: "all" + } + keras_layer { + class_name: 'MaskNet' + masknet { + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mask_blocks { + aggregation_size: 512 + output_size: 256 + } + mlp { + hidden_units: [512, 256] + } + } + } + } + concat_blocks: ['mask_net'] + } + rank_model { + } + embedding_regularization: 1e-4 +} +export_config { + multi_placeholder: false +} diff --git a/examples/configs/mlp_on_movielens.config b/examples/configs/mlp_on_movielens.config new file mode 100644 index 000000000..038b02a51 --- /dev/null +++ b/examples/configs/mlp_on_movielens.config @@ -0,0 +1,239 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/mlp_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: "RankModel" + feature_groups: { + group_name: 'features' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: 'mlp' + inputs { + feature_group_name: 'features' + } + layers { + keras_layer { + class_name: 'Dense' + st_params { + fields { + key: 'units' + value: { number_value: 256 } + } + fields { + key: 'activation' + value: { string_value: 'relu' } + } + } + } + } + layers { + keras_layer { + class_name: 'Dropout' + st_params { + fields { + key: 'rate' + value: { number_value: 0.5 } + } + } + } + } + layers { + keras_layer { + class_name: 'Dense' + st_params { + fields { + key: 'units' + value: { number_value: 256 } + } + fields { + key: 'activation' + value: { string_value: 'relu' } + } + } + } + } + layers { + keras_layer { + class_name: 'Dropout' + st_params { + fields { + key: 'rate' + value: { number_value: 0.5 } + } + } + } + } + layers { + keras_layer { + class_name: 'Dense' + st_params { + fields { + key: 'units' + value: { number_value: 1 } + } + } + } + } + } + concat_blocks: 'mlp' + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} diff --git a/examples/configs/multi_tower_on_movielens.config b/examples/configs/multi_tower_on_movielens.config new file mode 100644 index 000000000..a502922ae --- /dev/null +++ b/examples/configs/multi_tower_on_movielens.config @@ -0,0 +1,224 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/multi_tower_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_name: "multi tower" + model_class: "RankModel" + feature_groups: { + group_name: 'user' + feature_names: 'user_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + wide_deep: DEEP + } + feature_groups: { + group_name: 'item' + feature_names: 'movie_id' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + packages { + name: 'user' + blocks { + name: 'mlp' + inputs { + feature_group_name: 'user' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128] + } + } + } + concat_blocks: 'mlp' + } + packages { + name: 'item' + blocks { + name: 'mlp' + inputs { + feature_group_name: 'item' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 128] + } + } + } + concat_blocks: 'mlp' + } + blocks { + name: 'top_mlp' + inputs { + package_name: 'user' + } + inputs { + package_name: 'item' + } + layers { + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [128, 64] + } + } + } + } + concat_blocks: 'top_mlp' + } + rank_model { + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} diff --git a/examples/configs/wide_and_deep_backbone_on_movielens.config b/examples/configs/wide_and_deep_backbone_on_movielens.config new file mode 100644 index 000000000..0f13a0511 --- /dev/null +++ b/examples/configs/wide_and_deep_backbone_on_movielens.config @@ -0,0 +1,219 @@ +train_input_path: "examples/data/movielens_1m/movies_train_data" +eval_input_path: "examples/data/movielens_1m/movies_test_data" +model_dir: "examples/ckpt/wide_and_deep_movieslen" + +train_config { + log_step_count_steps: 100 + optimizer_config: { + adam_optimizer: { + learning_rate: { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 0.00001 + } + } + } + use_moving_average: false + } + save_checkpoints_steps: 2000 + sync_replicas: True +} + +eval_config { + metrics_set: { + auc {} + } + metrics_set: { + gauc { + uid_field: 'user_id' + } + } + metrics_set: { + max_f1 {} + } +} + +data_config { + input_fields { + input_name:'label' + input_type: INT32 + } + input_fields { + input_name:'user_id' + input_type: INT32 + } + input_fields { + input_name: 'movie_id' + input_type: INT32 + } + input_fields { + input_name:'rating' + input_type: INT32 + } + input_fields { + input_name: 'gender' + input_type: INT32 + } + input_fields { + input_name: 'age' + input_type: INT32 + } + input_fields { + input_name: 'job_id' + input_type: INT32 + } + input_fields { + input_name: 'zip_id' + input_type: STRING + } + input_fields { + input_name: 'title' + input_type: STRING + } + input_fields { + input_name: 'genres' + input_type: STRING + } + input_fields { + input_name: 'year' + input_type: INT32 + } + + label_fields: 'label' + batch_size: 1024 + num_epochs: 1 + prefetch_size: 32 + input_type: CSVInput + separator: '\t' +} + +feature_config: { + features: { + input_names: 'user_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 12000 + } + features: { + input_names: 'movie_id' + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 6000 + } + features: { + input_names: 'gender' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 2 + } + features: { + input_names: 'job_id' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 21 + } + features: { + input_names: 'age' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 7 + } + features: { + input_names: 'genres' + feature_type: TagFeature + separator: '|' + embedding_dim: 16 + hash_bucket_size: 100 + } + features: { + input_names: 'title' + feature_type: SequenceFeature + separator: ' ' + embedding_dim: 16 + hash_bucket_size: 10000 + sequence_combiner: { + text_cnn: { + filter_sizes: [2, 3, 4] + num_filters: [16, 8, 8] + } + } + } + features: { + input_names: 'year' + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 36 + } +} +model_config: { + model_class: "RankModel" + feature_groups: { + group_name: 'wide' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: WIDE + } + feature_groups: { + group_name: 'deep' + feature_names: 'user_id' + feature_names: 'movie_id' + feature_names: 'job_id' + feature_names: 'age' + feature_names: 'gender' + feature_names: 'year' + feature_names: 'genres' + wide_deep: DEEP + } + backbone { + blocks { + name: 'wide' + inputs { + feature_group_name: 'wide' + } + input_layer { + only_output_feature_list: true + } + } + blocks { + name: 'deep_logit' + inputs { + feature_group_name: 'deep' + } + keras_layer { + class_name: 'MLP' + mlp { + hidden_units: [256, 256, 256, 1] + use_final_bn: false + final_activation: 'linear' + } + } + } + blocks { + name: 'final_logit' + inputs { + block_name: 'wide' + input_fn: 'lambda x: tf.add_n(x)' + } + inputs { + block_name: 'deep_logit' + } + merge_inputs_into_list: true + keras_layer { + class_name: 'Add' + } + } + concat_blocks: 'final_logit' + } + rank_model { + wide_output_dim: 1 + l2_regularization: 1e-4 + } + embedding_regularization: 1e-4 +} diff --git a/examples/data/criteo/download_and_process.sh b/examples/data/criteo/download_and_process.sh index 30061a862..f0cc8aef9 100644 --- a/examples/data/criteo/download_and_process.sh +++ b/examples/data/criteo/download_and_process.sh @@ -1,6 +1,7 @@ #! /bin/bash if [ "$(uname)" == "Darwin" ]; then - curl -O https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz + #curl -O https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz + wget -c https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then wget -c https://easy-rec.oss-cn-hangzhou.aliyuncs.com/data/criteo_kaggle/kaggle-display-advertising-challenge-dataset.tar.gz elif [ "$(expr substr $(uname -s) 1 10)" == "MINGW32_NT" ]; then diff --git a/examples/data/criteo/process_criteo_kaggle.py b/examples/data/criteo/process_criteo_kaggle.py index 60b7d9776..e610e33a6 100644 --- a/examples/data/criteo/process_criteo_kaggle.py +++ b/examples/data/criteo/process_criteo_kaggle.py @@ -5,14 +5,21 @@ target_columns = ['label'] columns = target_columns + dense_features + category_features +# data_train = pd.read_csv( +# 'criteo_train_data', sep='\t', names=columns) +# +# for col in category_features: +# print(col, data_train[col].nunique()) + data_train = pd.read_csv( 'criteo_kaggle_display/train.txt', sep='\t', names=columns) samples_num = data_train.shape[0] print('samples_num:', samples_num, round(samples_num * 0.9)) -data_train[:round(samples_num * 0.9)].to_csv( +train_num = int(round(samples_num * 0.9)) +data_train[:train_num].to_csv( r'criteo_train_data', index=False, sep='\t', mode='a', header=False) -data_train[round(samples_num * 0.9):].to_csv( +data_train[train_num:].to_csv( r'criteo_test_data', index=False, sep='\t', mode='a', header=False) print('Done.') diff --git a/examples/rank_model/readme.md b/examples/rank_model/readme.md index 15d3f4dca..f6a2ba791 100644 --- a/examples/rank_model/readme.md +++ b/examples/rank_model/readme.md @@ -32,10 +32,12 @@ | MovieLens-1M | DeepFM | 0.8688 | | MovieLens-1M | DCN | 0.8576 | | MovieLens-1M | AutoInt | 0.8513 | +| MovieLens-1M | MaskNet | 0.8872 | +| MovieLens-1M | FibiNet | 0.8879 | # Criteo Research Kaggle 数据集 -在MovieLens-1M 数据集中, 我们提供了2个模型上的demo示例。 +在 `Criteo Research Kaggle` 数据集中, 我们提供了2个模型上的demo示例。 [FM](fm.md) / [DeepFM](deepfm.md) diff --git a/examples/readme.md b/examples/readme.md index 4861b0b42..f2c337431 100644 --- a/examples/readme.md +++ b/examples/readme.md @@ -73,14 +73,22 @@ EasyRec的模型训练和评估都是基于config配置文件的,配置文件 - [deepfm_on_movielens.config](configs/deepfm_on_movielens.config) +- [deepfm_backbone_on_movielens.config](configs/deepfm_backbone_on_movielens.config) + - [dcn_on_movielens.config](configs/dcn_on_movielens.config) - [autoint_on_movielens.config](configs/autoint_on_movielens.config) +- [masknet_on_movielens.config](configs/masknet_on_movielens.config) + +- [fibinet_on_movielens.config](configs/fibinet_on_movielens.config) + - [fm_on_criteo.config](configs/fm_on_criteo.config) - [deepfm_on_criteo.config](configs/deepfm_on_criteo.config) +- [deepfm_backbone_on_criteo.config](configs/deepfm_backbone_on_criteo.config) + **召回任务** - [dssm_on_books.config](configs/dssm_on_books.config) @@ -201,19 +209,35 @@ python -m easy_rec.python.train_eval --pipeline_config_path examples/configs/dee - MovieLens-1M - | Model | Epoch | AUC | - | --------- | ----- | ------ | - | Wide&Deep | 1 | 0.8558 | - | DeepFM | 1 | 0.8688 | - | DCN | 1 | 0.8576 | - | AutoInt | 1 | 0.8513 | + | Model | Epoch | AUC | + | ------------------- | ----- | ------ | + | MLP | 1 | 0.8616 | + | Wide&Deep | 1 | 0.8558 | + | Wide&Deep(Backbone) | 1 | 0.8854 | + | DeepFM | 1 | 0.8867 | + | DeepFM(Backbone) | 1 | 0.8872 | + | DCN | 1 | 0.8576 | + | DCN_v2 | 1 | 0.8770 | + | AutoInt | 1 | 0.8513 | + | MaskNet | 1 | 0.8872 | + | FibiNet | 1 | 0.8893 | + + 备注:`MovieLens-1M` 数据集较小,评估指标方差较大,以上结果仅供参考。 - Criteo-Research - | Model | Epoch | AUC | - | ------ | ----- | ------ | - | FM | 1 | 0.7577 | - | DeepFM | 1 | 0.7967 | + | Model | Epoch | AUC | + | ----------------- | ----- | ------- | + | FM | 1 | 0.7577 | + | DeepFM | 1 | 0.7970 | + | DeepFM (backbone) | 1 | 0.7970 | + | DeepFM (periodic) | 1 | 0.7980 | + | DeepFM (autodis) | 1 | 0.7979 | + | DLRM | 1 | 0.79785 | + | DLRM (backbone) | 1 | 0.7993 | + | DLRM (standard) | 1 | 0.7949 | + | DLRM (autodis) | 1 | 0.7989 | + | DLRM (periodic) | 1 | 0.7998 | ### 召回模型 diff --git a/pai_jobs/run.py b/pai_jobs/run.py index 41c61ad31..986731d36 100644 --- a/pai_jobs/run.py +++ b/pai_jobs/run.py @@ -166,6 +166,8 @@ tf.app.flags.DEFINE_string('oss_embedding_version', '', 'oss embedding version') tf.app.flags.DEFINE_bool('verbose', False, 'print more debug information') +tf.app.flags.DEFINE_bool('place_embedding_on_cpu', False, + 'whether to place embedding variables on cpu') # for automl hyper parameter tuning tf.app.flags.DEFINE_string('model_dir', None, 'model directory') @@ -434,7 +436,10 @@ def main(argv): elif FLAGS.cmd == 'export': check_param('export_dir') check_param('config') - + if FLAGS.place_embedding_on_cpu: + os.environ['place_embedding_on_cpu'] = 'True' + else: + os.environ['place_embedding_on_cpu'] = 'False' redis_params = {} if FLAGS.redis_url: redis_params['redis_url'] = FLAGS.redis_url diff --git a/samples/model_config/bst_cl_on_taobao.config b/samples/model_config/bst_cl_on_taobao.config new file mode 100644 index 000000000..77529db5e --- /dev/null +++ b/samples/model_config/bst_cl_on_taobao.config @@ -0,0 +1,304 @@ +train_input_path: "data/test/tb_data/taobao_train_data" +eval_input_path: "data/test/tb_data/taobao_test_data" +model_dir: "experiments/dbmtl_taobao_ckpt" + +train_config { + optimizer_config { + adam_optimizer { + learning_rate { + exponential_decay_learning_rate { + initial_learning_rate: 0.001 + decay_steps: 1000 + decay_factor: 0.5 + min_learning_rate: 1e-07 + } + } + } + use_moving_average: false + } + num_steps: 100 + sync_replicas: true + save_checkpoints_steps: 100 + log_step_count_steps: 100 +} + +eval_config { + metrics_set { + auc { + } + } +} + +data_config { + batch_size: 4096 + label_fields: "clk" + label_fields: "buy" + prefetch_size: 32 + input_type: CSVInput + input_fields { + input_name: "clk" + input_type: INT32 + } + input_fields { + input_name: "buy" + input_type: INT32 + } + input_fields { + input_name: "pid" + input_type: STRING + } + input_fields { + input_name: "adgroup_id" + input_type: STRING + } + input_fields { + input_name: "cate_id" + input_type: STRING + } + input_fields { + input_name: "campaign_id" + input_type: STRING + } + input_fields { + input_name: "customer" + input_type: STRING + } + input_fields { + input_name: "brand" + input_type: STRING + } + input_fields { + input_name: "user_id" + input_type: STRING + } + input_fields { + input_name: "cms_segid" + input_type: STRING + } + input_fields { + input_name: "cms_group_id" + input_type: STRING + } + input_fields { + input_name: "final_gender_code" + input_type: STRING + } + input_fields { + input_name: "age_level" + input_type: STRING + } + input_fields { + input_name: "pvalue_level" + input_type: STRING + } + input_fields { + input_name: "shopping_level" + input_type: STRING + } + input_fields { + input_name: "occupation" + input_type: STRING + } + input_fields { + input_name: "new_user_class_level" + input_type: STRING + } + input_fields { + input_name: "tag_category_list" + input_type: STRING + } + input_fields { + input_name: "tag_brand_list" + input_type: STRING + } + input_fields { + input_name: "price" + input_type: INT32 + } +} + +feature_config: { + features { + input_names: "pid" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "adgroup_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "cate_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10000 + embedding_name: 'category' + } + features { + input_names: "campaign_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "customer" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "brand" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + embedding_name: 'brand' + } + features { + input_names: "user_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100000 + } + features { + input_names: "cms_segid" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features { + input_names: "cms_group_id" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 100 + } + features { + input_names: "final_gender_code" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "age_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "pvalue_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "shopping_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "occupation" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features { + input_names: "new_user_class_level" + feature_type: IdFeature + embedding_dim: 16 + hash_bucket_size: 10 + } + features : { + input_names: 'tag_category_list' + feature_type: SequenceFeature + separator: '|' + hash_bucket_size: 10000 + embedding_dim: 16 + embedding_name: 'category' + } + features : { + input_names: 'tag_brand_list' + feature_type: SequenceFeature + separator: '|' + hash_bucket_size: 100000 + embedding_dim: 16 + embedding_name: 'brand' + } + features { + input_names: "price" + feature_type: IdFeature + embedding_dim: 16 + num_buckets: 50 + } +} + +model_config { + model_class: "DBMTL" + feature_groups { + group_name: "all" + feature_names: "user_id" + feature_names: "cms_segid" + feature_names: "cms_group_id" + feature_names: "age_level" + feature_names: "pvalue_level" + feature_names: "shopping_level" + feature_names: "occupation" + feature_names: "new_user_class_level" + feature_names: "adgroup_id" + feature_names: "cate_id" + feature_names: "campaign_id" + feature_names: "customer" + feature_names: "brand" + feature_names: "price" + feature_names: "pid" + wide_deep: DEEP + } + + feature_groups { + group_name: "seq" + feature_names: "brand" + feature_names: "cate_id" + feature_names: "tag_category_list" + feature_names: "tag_brand_list" + sequence_encoders { + bst { + hidden_size: 256 + num_attention_heads: 4 + num_hidden_layers: 1 + intermediate_size: 512 + hidden_act: 'gelu' + max_position_embeddings: 50 + hidden_dropout_prob: 0.1 + attention_probs_dropout_prob: 0 + need_contrastive_learning: true + } + } + wide_deep: DEEP + } + + dbmtl { + bottom_dnn { + hidden_units: [1024, 512, 256] + } + task_towers { + tower_name: "ctr" + label_name: "clk" + loss_type: CLASSIFICATION + metrics_set: { + auc {} + } + dnn { + hidden_units: [256, 128, 64, 32] + } + relation_dnn { + hidden_units: [32] + } + weight: 1.0 + } + l2_regularization: 1e-6 + use_sequence_encoder: true + } + embedding_regularization: 5e-6 +}