DLR-RM · araffin · Sep 20, 2020 · Sep 1, 2020 · Sep 2, 2020 · Sep 2, 2020
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -14,6 +14,7 @@ New Features:
 ^^^^^^^^^^^^^
 - Added ``unwrap_vec_wrapper()`` to ``common.vec_env`` to extract ``VecEnvWrapper`` if needed
 - Added ``StopTrainingOnMaxEpisodes`` to callback collection (@xicocaio)
+- Added ``device`` keyword argument to ``BaseAlgorithm.load()`` (@liorcohen5)
 
 Bug Fixes:
 ^^^^^^^^^^
@@ -399,4 +400,4 @@ And all the contributors:
 @MarvineGothic @jdossgollin @SyllogismRXS @rusu24edward @jbulow @Antymon @seheevic @justinkterry @edbeeching
 @flodorner @KuKuXia @NeoExtended @PartiallyTyped @mmcenta @richardwu @kinalmehta @rolandgvc @tkelestemur @mloo3
 @tirafesi @blurLake @koulakis @joeljosephjin @shwang @rk37 @andyshih12 @RaphaelWag @xicocaio
-@diditforlulz273
+@diditforlulz273 @liorcohen5
diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
@@ -316,13 +316,16 @@ def predict(
         return self.policy.predict(observation, state, mask, deterministic)
 
     @classmethod
-    def load(cls, load_path: str, env: Optional[GymEnv] = None, **kwargs) -> "BaseAlgorithm":
+    def load(
+        cls, load_path: str, env: Optional[GymEnv] = None, device: Union[th.device, str] = "auto", **kwargs
+    ) -> "BaseAlgorithm":
         """
         Load the model from a zip-file
 
         :param load_path: the location of the saved data
         :param env: the new environment to run the loaded model on
             (can be None if you only need prediction from a trained model) has priority over any saved environment
+        :param device: (Union[th.device, str]) Device on which the code should run.
         :param kwargs: extra arguments to change the model when loading
         """
         data, params, tensors = load_from_zip_file(load_path)
@@ -352,7 +355,7 @@ def load(cls, load_path: str, env: Optional[GymEnv] = None, **kwargs) -> "BaseAl
         model = cls(
             policy=data["policy_class"],
             env=env,
-            device="auto",
+            device=device,
             _init_setup_model=False,  # pytype: disable=not-instantiable,wrong-keyword-args
         )
 

diff --git a/tests/test_save_load.py b/tests/test_save_load.py
@@ -70,21 +70,26 @@ def test_save_load(tmp_path, model_class):
     # Check
     model.save(tmp_path / "test_save.zip")
     del model
-    model = model_class.load(str(tmp_path / "test_save.zip"), env=env)
 
-    # check if params are still the same after load
-    new_params = model.policy.state_dict()
+    # Check if the model loads as expected for every possible choice of device:
+    for device in ["auto", "cpu", "cuda"]:
+        model = model_class.load(str(tmp_path / "test_save.zip"), env=env, device=device)
 
-    # Check that all params are the same as before save load procedure now
-    for key in params:
-        assert th.allclose(params[key], new_params[key]), "Model parameters not the same after save and load."
+        # check if params are still the same after load
+        new_params = model.policy.state_dict()
 
-    # check if model still selects the same actions
-    new_selected_actions, _ = model.predict(observations, deterministic=True)
-    assert np.allclose(selected_actions, new_selected_actions, 1e-4)
+        # Check that all params are the same as before save load procedure now
+        for key in params:
+            assert th.allclose(params[key], new_params[key]), "Model parameters not the same after save and load."
 
-    # check if learn still works
-    model.learn(total_timesteps=1000, eval_freq=500)
+        # check if model still selects the same actions
+        new_selected_actions, _ = model.predict(observations, deterministic=True)
+        assert np.allclose(selected_actions, new_selected_actions, 1e-4)
+
+        # check if learn still works
+        model.learn(total_timesteps=1000, eval_freq=500)
+
+        del model
 
     # clear file from os
     os.remove(tmp_path / "test_save.zip")