@@ -190,54 +190,47 @@ def test_adamw_op_dygraph(self):
190190
191191 def test_adamw_op (self ):
192192 paddle .enable_static ()
193- places = [fluid .CPUPlace (), fluid .CUDAPlace (0 )]
194- for place in places :
195- train_prog = fluid .Program ()
196- startup = fluid .Program ()
197- with fluid .program_guard (train_prog , startup ):
198- with fluid .unique_name .guard ():
199- x = fluid .data (name = 'x' , shape = [None , 10 ], dtype = 'float32' )
200- y = fluid .data (name = 'y' , shape = [None , 1 ], dtype = 'float32' )
201-
202- fc1 = fluid .layers .fc (input = x , size = 32 , act = None )
203- prediction = fluid .layers .fc (input = fc1 , size = 1 , act = None )
204- cost = fluid .layers .square_error_cost (
205- input = prediction , label = y )
206- avg_cost = fluid .layers .mean (cost )
207-
208- simple_lr_fun = partial (
209- simple_lr_setting , decay_rate = 0.8 , n_layers = 2 )
210-
211- beta1 = fluid .layers .create_global_var (
212- shape = [1 ],
213- value = 0.85 ,
214- dtype = 'float32' ,
215- persistable = True )
216- beta2 = fluid .layers .create_global_var (
217- shape = [1 ],
218- value = 0.95 ,
219- dtype = 'float32' ,
220- persistable = True )
221- betas = [beta1 , beta2 ]
222- opt = paddle .optimizer .AdamW (
223- learning_rate = 1e-5 ,
224- beta1 = beta1 ,
225- beta2 = beta2 ,
226- weight_decay = 0.01 ,
227- epsilon = 1e-8 ,
228- lr_ratio = simple_lr_fun )
229- opt .minimize (avg_cost )
230-
231- exe = fluid .Executor (place )
232- exe .run (startup )
233- for _ in range (2 ):
234- inputs = np .random .random (size = [8 , 10 ]).astype ('float32' )
235- outputs = np .random .random (size = [8 , 1 ]).astype ('float32' )
236- rets = exe .run (train_prog ,
237- feed = {"x" : inputs ,
238- "y" : outputs },
239- fetch_list = [avg_cost ])
240- assert rets [0 ] is not None
193+ place = fluid .CUDAPlace (0 ) if fluid .is_compiled_with_cuda () \
194+ else fluid .CPUPlace ()
195+ train_prog = fluid .Program ()
196+ startup = fluid .Program ()
197+ with fluid .program_guard (train_prog , startup ):
198+ with fluid .unique_name .guard ():
199+ x = fluid .data (name = 'x' , shape = [None , 10 ], dtype = 'float32' )
200+ y = fluid .data (name = 'y' , shape = [None , 1 ], dtype = 'float32' )
201+
202+ fc1 = fluid .layers .fc (input = x , size = 32 , act = None )
203+ prediction = fluid .layers .fc (input = fc1 , size = 1 , act = None )
204+ cost = fluid .layers .square_error_cost (input = prediction , label = y )
205+ avg_cost = fluid .layers .mean (cost )
206+
207+ simple_lr_fun = partial (
208+ simple_lr_setting , decay_rate = 0.8 , n_layers = 2 )
209+
210+ beta1 = fluid .layers .create_global_var (
211+ shape = [1 ], value = 0.85 , dtype = 'float32' , persistable = True )
212+ beta2 = fluid .layers .create_global_var (
213+ shape = [1 ], value = 0.95 , dtype = 'float32' , persistable = True )
214+ betas = [beta1 , beta2 ]
215+ opt = paddle .optimizer .AdamW (
216+ learning_rate = 1e-5 ,
217+ beta1 = beta1 ,
218+ beta2 = beta2 ,
219+ weight_decay = 0.01 ,
220+ epsilon = 1e-8 ,
221+ lr_ratio = simple_lr_fun )
222+ opt .minimize (avg_cost )
223+
224+ exe = fluid .Executor (place )
225+ exe .run (startup )
226+ for _ in range (2 ):
227+ inputs = np .random .random (size = [8 , 10 ]).astype ('float32' )
228+ outputs = np .random .random (size = [8 , 1 ]).astype ('float32' )
229+ rets = exe .run (train_prog ,
230+ feed = {"x" : inputs ,
231+ "y" : outputs },
232+ fetch_list = [avg_cost ])
233+ assert rets [0 ] is not None
241234
242235 paddle .disable_static ()
243236
0 commit comments