diff --git a/libai/scheduler/__init__.py b/libai/scheduler/__init__.py index 378d06d48..2fae52050 100644 --- a/libai/scheduler/__init__.py +++ b/libai/scheduler/__init__.py @@ -20,4 +20,5 @@ WarmupExponentialLR, WarmupMultiStepLR, WarmupPolynomialLR, + WarmupStepLR, ) diff --git a/libai/scheduler/lr_scheduler.py b/libai/scheduler/lr_scheduler.py index 8d7f56df0..3f32a813f 100644 --- a/libai/scheduler/lr_scheduler.py +++ b/libai/scheduler/lr_scheduler.py @@ -98,6 +98,42 @@ def WarmupCosineAnnealingLR( return warmup_cosine_annealing_lr +def WarmupStepLR( + optimizer: flow.optim.Optimizer, + max_iter: int, + warmup_factor: float, + warmup_iter: int, + step_size: int, + gamma: float = 0.1, + warmup_method: str = "linear", +): + """Create a schedule with a learning rate that decreases following the values of the Step + function between the initial lr set in the optimizer to 0, after a warmup period during which + it increases linearly between 0 and the initial lr set in the optimizer. + Args: + optimizer (flow.optim.Optimizer): Wrapped optimizer. + max_iter (int): Total training iters. + warmup_factor (float): The warmup factor. + warmup_iter (int): The number of warmup steps. + step_size (int): Period of learning rate decay. + gamma (float, optional): Multiplicative factor of learning rate decay. Defaults to 0.1. + warmup_method (str, optional): The method of warmup, you can choose "linear" or "constant". + In linear mode, the multiplication factor starts with warmup_factor in the first + epoch and then inreases linearly to reach 1. Defaults to "linear". + """ + step_lr = flow.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) + if warmup_iter == 0: + logger.warning("warmup iters equals to zero, return StepLR") + return step_lr + warmup_step_lr = flow.optim.lr_scheduler.WarmUpLR( + step_lr, + warmup_factor=warmup_factor, + warmup_iters=warmup_iter, + warmup_method=warmup_method, + ) + return warmup_step_lr + + def WarmupMultiStepLR( optimizer: flow.optim.Optimizer, max_iter: int, @@ -203,7 +239,11 @@ def WarmupPolynomialLR( epoch and then inreases linearly to reach 1. Defaults to "linear". """ polynomial_lr = flow.optim.lr_scheduler.PolynomialLR( - optimizer, steps=max_iter, end_learning_rate=end_learning_rate, power=power, cycle=cycle + optimizer, + decay_batch=max_iter, + end_learning_rate=end_learning_rate, + power=power, + cycle=cycle, ) if warmup_iter == 0: logger.warning("warmup iters equals to zero, return PolynomialLR") diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index cc46948ad..dceae7459 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -26,10 +26,11 @@ WarmupExponentialLR, WarmupMultiStepLR, WarmupPolynomialLR, + WarmupStepLR, ) -@unittest.skip("Bugs in warmup scheduler") +# @unittest.skip("Bugs in warmup scheduler") class TestScheduler(TestCase): def test_warmup_multistep(self): p = nn.Parameter(flow.zeros(0)) @@ -58,6 +59,33 @@ def test_warmup_multistep(self): self.assertTrue(np.allclose(lrs[15:20], 0.05)) self.assertTrue(np.allclose(lrs[20:], 0.005)) + def test_warmup_step(self): + p = nn.Parameter(flow.zeros(0)) + opt = flow.optim.SGD([p], lr=5.0) + + sched = WarmupStepLR( + optimizer=opt, + max_iter=10, + step_size=10, + gamma=0.1, + warmup_factor=0.001, + warmup_iter=5, + warmup_method="linear", + ) + + p.sum().backward() + opt.step() + + lrs = [0.005] + for _ in range(30): + sched.step() + lrs.append(opt.param_groups[0]["lr"]) + self.assertTrue(np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001])) + self.assertTrue(np.allclose(lrs[5:10], 5.0)) + self.assertTrue(np.allclose(lrs[10:20], 0.5)) + self.assertTrue(np.allclose(lrs[20:30], 0.05)) + self.assertTrue(np.allclose(lrs[30:], 0.005)) + def test_warmup_cosine(self): p = nn.Parameter(flow.zeros(0)) opt = flow.optim.SGD([p], lr=5.0) @@ -105,18 +133,22 @@ def test_warmup_exponential(self): def _get_exponential_lr(base_lr, gamma, max_iters, warmup_iters): valid_values = [] - for idx in range(max_iters - warmup_iters): + for idx in range(warmup_iters, max_iters + 1): valid_values.append(base_lr * (gamma ** idx)) return valid_values for _ in range(30): sched.step() lrs.append(opt.param_groups[0]["lr"]) - self.assertTrue(np.allclose(lrs[:5], [0.005, 1.004, 2.003, 3.002, 4.001])) + self.assertTrue( + np.allclose( + lrs[:5], [0.005, 0.00401, 0.0030199999999999997, 0.00203, 0.0010399999999999997] + ) + ) valid_intermediate_values = _get_exponential_lr( base_lr=5.0, gamma=0.1, max_iters=30, warmup_iters=5 ) - self.assertEqual(lrs[5:30], valid_intermediate_values) + self.assertEqual(lrs[5:], valid_intermediate_values) def test_warmup_polynomial(self): p = nn.Parameter(flow.zeros(0))