|
15 | 15 | import paddle |
16 | 16 | import contextlib |
17 | 17 | import numpy as np |
| 18 | +from paddle import _C_ops |
| 19 | +from paddle.fluid import core |
| 20 | +from paddle.fluid.data_feeder import check_variable_and_dtype |
| 21 | +from paddle.fluid.framework import in_dygraph_mode, default_main_program |
| 22 | +from paddle.fluid.layer_helper import LayerHelper |
18 | 23 |
|
19 | 24 | __all__ = [] |
20 | 25 |
|
@@ -93,3 +98,135 @@ def model_parallel_random_seed(seed=None): |
93 | 98 | RNG_STATE_TRACKER.reset() |
94 | 99 | RNG_STATE_TRACKER.add(MODEL_PARALLEL_RNG, local_seed) |
95 | 100 | paddle.seed(global_seed) |
| 101 | + |
| 102 | + |
| 103 | +def determinate_seed(rng_name): |
| 104 | + assert rng_name is not None and rng_name != "" |
| 105 | + helper = LayerHelper('seed', **locals()) |
| 106 | + out = helper.create_variable_for_type_inference(dtype=paddle.int32) |
| 107 | + # set force_cpu to reduce sync copy from CPU->GPU->CPU, and reduce pipeline hang |
| 108 | + helper.append_op( |
| 109 | + type='seed', |
| 110 | + outputs={'Out': out}, |
| 111 | + attrs={'deterministic': True, |
| 112 | + 'rng_name': rng_name, |
| 113 | + 'force_cpu': True}) |
| 114 | + return out |
| 115 | + |
| 116 | + |
| 117 | +def dropout(x, |
| 118 | + p=0.5, |
| 119 | + axis=None, |
| 120 | + rng_name=None, |
| 121 | + training=True, |
| 122 | + mode="upscale_in_train", |
| 123 | + name=None): |
| 124 | + """ |
| 125 | + Dropout is a regularization technique for reducing overfitting by preventing |
| 126 | + neuron co-adaption during training. The dropout operator randomly sets the |
| 127 | + outputs of some units to zero, while upscale others according to the given |
| 128 | + dropout probability. |
| 129 | +
|
| 130 | + Args: |
| 131 | + x (Tensor): The input tensor. The data type is float32 or float64. |
| 132 | + p (float|int): Probability of setting units to zero. Default 0.5. |
| 133 | + axis (int|list|tuple): The axis along which the dropout is performed. Default None. |
| 134 | + rng_name (str): The random seed generator name, which used to obtain deterministic results. |
| 135 | + training (bool): A flag indicating whether it is in train phrase or not. Default True. |
| 136 | + mode(str): ['upscale_in_train'(default) | 'downscale_in_infer']. |
| 137 | +
|
| 138 | + 1. upscale_in_train(default), upscale the output at training time |
| 139 | +
|
| 140 | + - train: out = input * mask / ( 1.0 - dropout_prob ) |
| 141 | + - inference: out = input |
| 142 | +
|
| 143 | + 2. downscale_in_infer, downscale the output at inference |
| 144 | +
|
| 145 | + - train: out = input * mask |
| 146 | + - inference: out = input * (1.0 - dropout_prob) |
| 147 | + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. |
| 148 | +
|
| 149 | + Returns: |
| 150 | + A Tensor representing the dropout, has same shape and data type as `x` . |
| 151 | +
|
| 152 | +
|
| 153 | + Examples: |
| 154 | + We use ``p=0.5`` in the following description for simplicity. |
| 155 | +
|
| 156 | + 1. When ``axis=None`` , this is commonly used dropout, which dropout each element of x randomly. |
| 157 | +
|
| 158 | + .. code-block:: text |
| 159 | +
|
| 160 | + Let's see a simple case when x is a 2d tensor with shape 2*3: |
| 161 | + [[1 2 3] |
| 162 | + [4 5 6]] |
| 163 | + we generate mask with the same shape as x, which is 2*3. The value of mask is |
| 164 | + sampled from a Bernoulli distribution randomly. For example, we may get such mask: |
| 165 | + [[0 1 0] |
| 166 | + [1 0 1]] |
| 167 | + So the output is obtained from elementwise multiply of x and mask: |
| 168 | + [[0 2 0] |
| 169 | + [4 0 6]] |
| 170 | + Using default setting, i.e. ``mode='upscale_in_train'`` , |
| 171 | + if in training phase, the final upscale output is: |
| 172 | + [[0 4 0 ] |
| 173 | + [8 0 12]] |
| 174 | + if in test phase, the output is the same as input: |
| 175 | + [[1 2 3] |
| 176 | + [4 5 6]] |
| 177 | + we can also set ``mode='downscale_in_infer'`` , then |
| 178 | + if in training phase, the final output is: |
| 179 | + [[0 2 0] |
| 180 | + [4 0 6]] |
| 181 | + if in test phase, the scale output is: |
| 182 | + [[0.5 1. 1.5] |
| 183 | + [2. 2.5 3. ]] |
| 184 | +
|
| 185 | + """ |
| 186 | + if rng_name is None: |
| 187 | + return paddle.nn.functional.dropout(x, p, axis, training, mode, name) |
| 188 | + |
| 189 | + # fast return for p == 0 |
| 190 | + if p == 0: return x |
| 191 | + |
| 192 | + assert isinstance(p, (float, int)), \ |
| 193 | + TypeError("p argument should be a number") |
| 194 | + assert 0 <= p <= 1, ValueError("p argument should between 0 and 1") |
| 195 | + assert mode in ('downscale_in_infer', 'upscale_in_train'), \ |
| 196 | + ValueError( |
| 197 | + "mode argument should be 'downscale_in_infer' or 'upscale_in_train'") |
| 198 | + |
| 199 | + assert axis is None, \ |
| 200 | + TypeError("unsupport axis when using random seed generator") |
| 201 | + |
| 202 | + mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer |
| 203 | + |
| 204 | + # dygraph using tracker, doesn't need determinate seed |
| 205 | + if in_dygraph_mode(): |
| 206 | + out, mask = _C_ops.dropout(x, 'dropout_prob', p, 'is_test', |
| 207 | + not training, 'fix_seed', False, 'seed', 0, |
| 208 | + 'dropout_implementation', mode) |
| 209 | + return out |
| 210 | + |
| 211 | + seed = determinate_seed(rng_name) |
| 212 | + |
| 213 | + helper = LayerHelper('dropout', **locals()) |
| 214 | + check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], |
| 215 | + 'dropout') |
| 216 | + |
| 217 | + out = helper.create_variable_for_type_inference(dtype=x.dtype) |
| 218 | + mask = helper.create_variable_for_type_inference( |
| 219 | + dtype=core.VarDesc.VarType.UINT8, stop_gradient=True) |
| 220 | + |
| 221 | + helper.append_op( |
| 222 | + type='dropout', |
| 223 | + inputs={'X': [x], |
| 224 | + 'Seed': seed}, |
| 225 | + outputs={'Out': [out], |
| 226 | + 'Mask': [mask]}, |
| 227 | + attrs={ |
| 228 | + 'dropout_prob': p, |
| 229 | + 'is_test': not training, |
| 230 | + 'dropout_implementation': mode, |
| 231 | + }) |
| 232 | + return out |
0 commit comments