From 066556323d8855aafd65a42c703b059721e85f88 Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Tue, 10 Nov 2020 07:07:43 +0000 Subject: [PATCH 1/2] make Numpy version is below 1.19.3 --- python/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/requirements.txt b/python/requirements.txt index 138220b405748a..12f36b37085737 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,7 @@ requests>=2.20.0 numpy>=1.13, <=1.16.4 ; python_version<"3.5" -numpy>=1.13 ; python_version>="3.5" +numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows" +numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows" protobuf>=3.1.0 gast==0.3.3 scipy>=0.19.0, <=1.2.1 ; python_version<"3.5" From 725d83bc604df3e9c9a7466e80f379037a49d29b Mon Sep 17 00:00:00 2001 From: zhouwei25 Date: Tue, 10 Nov 2020 15:54:09 +0000 Subject: [PATCH 2/2] fix 2.0 doc --- paddle/fluid/operators/unique_op.cu | 2 +- paddle/fluid/pybind/imperative.cc | 3 - python/paddle/framework/__init__.py | 6 +- python/paddle/framework/io.py | 4 - python/paddle/optimizer/lr.py | 115 ++++++++++++++++------------ 5 files changed, 70 insertions(+), 60 deletions(-) diff --git a/paddle/fluid/operators/unique_op.cu b/paddle/fluid/operators/unique_op.cu index 848df4c7aba8d8..d22406f27c4702 100644 --- a/paddle/fluid/operators/unique_op.cu +++ b/paddle/fluid/operators/unique_op.cu @@ -177,7 +177,7 @@ static void UniqueFlattendCUDATensor(const framework::ExecutionContext& context, thrust::sort_by_key(thrust::device, in_data_hat, in_data_hat + num_input, sorted_indices_data); - // 1. Calculate op result: 'out': + // 1. Calculate op result: 'out' Tensor range; range.Resize(framework::make_ddim({num_input + 1})); auto range_data_ptr = range.mutable_data(context.GetPlace()); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 4d68afeede4e51..0cce83318eab37 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -685,8 +685,6 @@ void BindImperative(py::module *m_ptr) { .. code-block:: python import paddle - paddle.disable_static() - linear = Linear(32, 64) data = paddle.uniform(shape=[30, 10, 32], -1, 1) x = linear(data) @@ -704,7 +702,6 @@ void BindImperative(py::module *m_ptr) { .. code-block:: python import paddle - paddle.disable_static() inputs = [] for _ in range(10): diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index 3d06b4ab911ac4..a7c073ccfb28b0 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -18,10 +18,7 @@ 'get_default_dtype', 'set_default_dtype' ] -__all__ += [ - 'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad', - 'DataParallel' -] +__all__ += ['grad', 'LayerList', 'load', 'save', 'no_grad', 'DataParallel'] from . import random from .random import seed @@ -39,7 +36,6 @@ from paddle.fluid import core #DEFINE_ALIAS from ..fluid.dygraph.base import no_grad #DEFINE_ALIAS -from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS from ..fluid.dygraph.base import grad #DEFINE_ALIAS from .io import save from .io import load diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index 7e8c717bb1deb7..945c8160b47fbd 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -225,8 +225,6 @@ def save(obj, path): import paddle - paddle.disable_static() - emb = paddle.nn.Embedding(10, 10) layer_state_dict = emb.state_dict() paddle.save(layer_state_dict, "emb.pdparams") @@ -318,8 +316,6 @@ def load(path, **configs): .. code-block:: python import paddle - - paddle.disable_static() emb = paddle.nn.Embedding(10, 10) layer_state_dict = emb.state_dict() diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 80b4b2a9d05626..e4fb54c229f212 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -226,14 +226,15 @@ class NoamDecay(LRScheduler): scheduler = paddle.optimizer.lr.NoamDecay(d_model=0.01, warmup_steps=100, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -251,7 +252,7 @@ class NoamDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -259,7 +260,8 @@ class NoamDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ @@ -322,14 +324,15 @@ class PiecewiseDecay(LRScheduler): scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=[3, 6, 9], values=[0.1, 0.2, 0.3, 0.4], verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -347,7 +350,7 @@ class PiecewiseDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -355,7 +358,8 @@ class PiecewiseDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, boundaries, values, last_epoch=-1, verbose=False): @@ -403,14 +407,15 @@ class NaturalExpDecay(LRScheduler): scheduler = paddle.optimizer.lr.NaturalExpDecay(learning_rate=0.5, gamma=0.1, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -428,7 +433,7 @@ class NaturalExpDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -436,7 +441,8 @@ class NaturalExpDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -481,14 +487,15 @@ class InverseTimeDecay(LRScheduler): scheduler = paddle.optimizer.lr.InverseTimeDecay(learning_rate=0.5, gamma=0.1, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -506,7 +513,7 @@ class InverseTimeDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -514,7 +521,8 @@ class InverseTimeDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ @@ -576,14 +584,15 @@ class PolynomialDecay(LRScheduler): scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.5, decay_steps=20, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -601,7 +610,7 @@ class PolynomialDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -609,7 +618,8 @@ class PolynomialDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, @@ -691,14 +701,15 @@ class LinearWarmup(LRScheduler): learning_rate=0.5, warmup_steps=20, start_lr=0, end_lr=0.5, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -717,7 +728,7 @@ class LinearWarmup(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -725,7 +736,8 @@ class LinearWarmup(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, @@ -814,14 +826,15 @@ class ExponentialDecay(LRScheduler): scheduler = paddle.optimizer.lr.ExponentialDecay(learning_rate=0.5, gamma=0.9, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -839,7 +852,7 @@ class ExponentialDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -847,7 +860,8 @@ class ExponentialDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, learning_rate, gamma, last_epoch=-1, verbose=False): @@ -901,14 +915,15 @@ class MultiStepDecay(LRScheduler): scheduler = paddle.optimizer.lr.MultiStepDecay(learning_rate=0.5, milestones=[2, 4, 6], gamma=0.8, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -926,7 +941,7 @@ class MultiStepDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -934,7 +949,8 @@ class MultiStepDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, @@ -1008,14 +1024,15 @@ class StepDecay(LRScheduler): scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.5, step_size=5, gamma=0.8, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -1033,7 +1050,7 @@ class StepDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -1041,7 +1058,8 @@ class StepDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ def __init__(self, @@ -1102,14 +1120,15 @@ class LambdaDecay(LRScheduler): scheduler = paddle.optimizer.lr.LambdaDecay(learning_rate=0.5, lr_lambda=lambda x:0.95**x, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -1127,7 +1146,7 @@ class LambdaDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -1135,7 +1154,8 @@ class LambdaDecay(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step() + scheduler.step() # If you update learning rate each step + # scheduler.step() # If you update learning rate each epoch """ @@ -1200,14 +1220,15 @@ class ReduceOnPlateau(LRScheduler): scheduler = paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=0.5, patience=5, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) loss.backward() sgd.step() sgd.clear_gradients() - scheduler.step(loss) + scheduler.step(loss) # If you update learning rate each step + # scheduler.step(loss) # If you update learning rate each epoch # train on static graph mode paddle.enable_static() @@ -1225,7 +1246,7 @@ class ReduceOnPlateau(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={ @@ -1233,7 +1254,8 @@ class ReduceOnPlateau(LRScheduler): 'y': np.random.randn(3, 4, 5).astype('float32') }, fetch_list=loss.name) - scheduler.step(out[0]) + scheduler.step(out[0]) # If you update learning rate each step + # scheduler.step(out[0]) # If you update learning rate each epoch """ @@ -1268,7 +1290,6 @@ def __init__(self, "The type of 'learning_rate' in 'ReduceOnPlateau' must be 'float', but received %s." % type(learning_rate)) - self.verbose = verbose self.patience = patience self.threshold = threshold self.threshold_mode = threshold_mode @@ -1406,7 +1427,7 @@ class CosineAnnealingDecay(LRScheduler): scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.5, T_max=10, verbose=True) sgd = paddle.optimizer.SGD(learning_rate=scheduler, parameters=linear.parameters()) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): x = paddle.uniform([10, 10]) out = linear(x) loss = paddle.mean(out) @@ -1431,7 +1452,7 @@ class CosineAnnealingDecay(LRScheduler): exe = paddle.static.Executor() exe.run(start_prog) for epoch in range(20): - for batch_id in range(2): + for batch_id in range(5): out = exe.run( main_prog, feed={