Eclectic-Sheep · belerico · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 17, 2025
@@ -22,7 +22,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11"]
 
     timeout-minutes: 20
 

@@ -27,13 +27,13 @@
 #     print(sb3.common.evaluation.evaluate_policy(model.policy, env))
 
 
-# Stable Baselines3 SAC - LunarLanderContinuous-v2
+# Stable Baselines3 SAC - LunarLanderContinuous-v3
 # Decomment below to run SAC benchmarks
 
 # if __name__ == "__main__":
 #     with timer("run_time", SumMetric, sync_on_compute=False):
 #         env = sb3.common.vec_env.DummyVecEnv(
-#             [lambda: gym.make("LunarLanderContinuous-v2", render_mode="rgb_array") for _ in range(4)]
+#             [lambda: gym.make("LunarLanderContinuous-v3", render_mode="rgb_array") for _ in range(4)]
 #         )
 #         model = SAC("MlpPolicy", env, verbose=0, device="cpu")
 #         model.learn(total_timesteps=1024 * 64, log_interval=None)

@@ -80,13 +80,13 @@ The algorithms that work with only vector observations are reported here:
 * SAC
 * Droq
 
-For any of them you **must select** only the environments that provide vector observations. For instance, you can train the *SAC* algorithm on the `LunarLanderContinuous-v2` environment, but you cannot train it on the `CarRacing-v2` environment.
+For any of them you **must select** only the environments that provide vector observations. For instance, you can train the *SAC* algorithm on the `LunarLanderContinuous-v3` environment, but you cannot train it on the `CarRacing-v2` environment.
 
 For these algorithms, you have to specify the *mlp* keys you want to encode. As usual, you have to specify them through the `mlp_keys.encoder` and `mlp_keys.decoder` arguments (in the command or the configs).
 
-For instance, you can train a SAC agent on the `LunarLanderContinuous-v2` with the following command:
+For instance, you can train a SAC agent on the `LunarLanderContinuous-v3` with the following command:
 ```bash
-python sheeprl.py exp=sac env=gym env.id=LunarLanderContinuous-v2 algo.mlp_keys.encoder=[state]
+python sheeprl.py exp=sac env=gym env.id=LunarLanderContinuous-v3 algo.mlp_keys.encoder=[state]
 ```
 
 

@@ -24,22 +24,25 @@ maintainers = [
 keywords = ["reinforcement", "machine", "learning", "distributed", "production"]
 license = { file = "LICENSE" }
 readme = { file = "docs/README.md", content-type = "text/markdown" }
-requires-python = ">=3.8,<3.12"
+requires-python = ">=3.9,<3.12"
 classifiers = ["Programming Language :: Python", "Topic :: Scientific/Engineering :: Artificial Intelligence"]
 dependencies = [
-  "gymnasium==0.29.*",
-  "pygame >=2.1.3",
+  "gymnasium==1.0.0",
+  "pygame>=2.1.3",
   "moviepy>=1.0.3",
   "tensorboard>=2.10",
   "python-dotenv>=1.0.0",
   "lightning>=2.0",
-  "lightning-utilities<=0.9",
+  "lightning-utilities<=0.11.9",
   "hydra-core==1.3.0",
   "torchmetrics",
   "rich==13.5.*",
-  "opencv-python==4.8.0.*",
-  "torch>=2.0,!=2.2.0",
-  "numpy<2.0"
+  "opencv-python==4.10.*",
+  "torch==2.3.1",
+  # Windows only:
+  "numpy==1.26.0; sys_platform == 'win32'",
+  # Non-Windows (Linux, macOS, etc.):
+  "numpy>=2.0.0; sys_platform != 'win32'",
 ]
 dynamic = ["version"]
 
@@ -74,13 +77,13 @@ dev = [
   "autoflake==2.2.1",
   "ruff==0.1.11",
 ]
-mujoco = ["mujoco>=2.3.3", "gymnasium[mujoco]==0.29.*"]
+mujoco = ["mujoco>=2.3.3", "gymnasium[mujoco]==1.0.0"]
 dmc = ["dm_control>=1.0.12"]
-box2d = ["gymnasium[box2d]==0.29.*"]
+box2d = ["gymnasium[box2d]==1.0.0"]
 atari = [
-  "gymnasium[atari]==0.29.*",
-  "gymnasium[accept-rom-license]==0.29.*",
-  "gymnasium[other]==0.29.*",
+  "gymnasium[atari]==1.0.0",
+  "gymnasium[accept-rom-license]==1.0.0",
+  "gymnasium[other]==1.0.0",
 ]
 minedojo = ["minedojo==0.1", "importlib_resources==5.12.0", "gym==0.21.0"]
 minerl = ["setuptools==66.0.0", "minerl==0.4.4", "gym==0.19.0"]

@@ -26,7 +26,7 @@ buffer:
 
 # Environment
 env:
-  id: LunarLanderContinuous-v2
+  id: LunarLanderContinuous-v3
 
 metric:
   aggregator:

@@ -10,7 +10,7 @@ run_benchmarks: True
 
 # Environment
 env:
-  id: LunarLanderContinuous-v2
+  id: LunarLanderContinuous-v3
   capture_video: False
   num_envs: 4
 

@@ -18,7 +18,7 @@ def __init__(
         if self._dict_obs_space:
             self.observation_space = gym.spaces.Dict(
                 {
-                    "rgb": gym.spaces.Box(0, 256, shape=image_size, dtype=np.uint8),
+                    "rgb": gym.spaces.Box(0, 255, shape=image_size, dtype=np.uint8),
                     "state": gym.spaces.Box(-20, 20, shape=vector_shape, dtype=np.float32),
                 }
             )
@@ -43,7 +43,7 @@ def get_obs(self) -> Dict[str, np.ndarray]:
         if self._dict_obs_space:
             return {
                 # da sostituire con np.random.rand
-                "rgb": np.full(self.observation_space["rgb"].shape, self._current_step % 256, dtype=np.uint8),
+                "rgb": np.full(self.observation_space["rgb"].shape, self._current_step % 255, dtype=np.uint8),
                 "state": np.full(self.observation_space["state"].shape, self._current_step, dtype=np.uint8),
             }
         else:

@@ -25,7 +25,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper):
         "MountainCarContinuous-v0": np.array([1]),
         "Pendulum-v1": np.array([2]),
         "LunarLander-v2": np.array([2, 3, 5]),
-        "LunarLanderContinuous-v2": np.array([2, 3, 5]),
+        "LunarLanderContinuous-v3": np.array([2, 3, 5]),
     }
 
     def __init__(self, env: gym.Env):

@@ -107,10 +107,14 @@ def thunk() -> gym.Env:
                         f"is allowed in {cfg.env.id}, "
                         f"only the first one is kept: {cfg.algo.cnn_keys.encoder[0]}"
                     )
+                obs_key = "state"
                 if encoder_mlp_keys_length > 0:
-                    gym.wrappers.pixel_observation.STATE_KEY = cfg.algo.mlp_keys.encoder[0]
-                env = gym.wrappers.PixelObservationWrapper(
-                    env, pixels_only=encoder_mlp_keys_length == 0, pixel_keys=(cfg.algo.cnn_keys.encoder[0],)
+                    obs_key = cfg.algo.mlp_keys.encoder[0]
+                env = gym.wrappers.AddRenderObservation(
+                    env,
+                    render_only=encoder_mlp_keys_length == 0,
+                    render_key=cfg.algo.cnn_keys.encoder[0],
+                    obs_key=obs_key,
                 )
             else:
                 if encoder_mlp_keys_length > 1:
@@ -120,7 +124,7 @@ def thunk() -> gym.Env:
                         f"only the first one is kept: {cfg.algo.mlp_keys.encoder[0]}"
                     )
                 mlp_key = cfg.algo.mlp_keys.encoder[0]
-                env = gym.wrappers.TransformObservation(env, lambda obs: {mlp_key: obs})
+                env = gym.wrappers.TransformObservation(env, lambda obs: {mlp_key: obs}, None)
                 env.observation_space = gym.spaces.Dict({mlp_key: env.observation_space})
         elif isinstance(env.observation_space, gym.spaces.Box) and 2 <= len(env.observation_space.shape) <= 3:
             # Pixel only observation
@@ -136,7 +140,9 @@ def thunk() -> gym.Env:
                     "Please set at least one cnn key in the config file: `algo.cnn_keys.encoder=[your_cnn_key]`"
                 )
             cnn_key = cfg.algo.cnn_keys.encoder[0]
-            env = gym.wrappers.TransformObservation(env, lambda obs: {cnn_key: obs})
+            env = gym.wrappers.TransformObservation(
+                env, lambda obs: {cnn_key: obs}, gym.spaces.Dict({cnn_key: env.observation_space})
+            )
             env.observation_space = gym.spaces.Dict({cnn_key: env.observation_space})
 
         if (
@@ -195,7 +201,7 @@ def transform_obs(obs: Dict[str, Any]):
 
             return obs
 
-        env = gym.wrappers.TransformObservation(env, transform_obs)
+        env = gym.wrappers.TransformObservation(env, transform_obs, None)
         for k in cnn_keys:
             env.observation_space[k] = gym.spaces.Box(
                 0, 255, (1 if cfg.env.grayscale else 3, cfg.env.screen_size, cfg.env.screen_size), np.uint8
@@ -222,7 +228,7 @@ def transform_obs(obs: Dict[str, Any]):
         if cfg.env.capture_video and rank == 0 and vector_env_idx == 0 and run_name is not None:
             if cfg.env.grayscale:
                 env = GrayscaleRenderWrapper(env)
-            env = gym.experimental.wrappers.RecordVideoV0(
+            env = gym.wrappers.RecordVideo(
                 env, os.path.join(run_name, prefix + "_videos" if prefix else "videos"), disable_logger=True
             )
             env.metadata["render_fps"] = env.frames_per_sec

@@ -34,7 +34,7 @@ def test_replay_buffer_add_single_td_not_full():
     rb.add(td1)
     assert not rb.full
     assert rb._pos == 2
-    np.testing.assert_allclose(rb["a"][:2], td1["a"])
+    assert np.allclose(rb["a"][:2], td1["a"])
 
 
 def test_replay_buffer_add_tds():
@@ -51,7 +51,7 @@ def test_replay_buffer_add_tds():
     assert rb["a"][0] == td3["a"][-2]
     assert rb["a"][1] == td3["a"][-1]
     assert rb._pos == 2
-    np.testing.assert_allclose(rb["a"][2:4], td2["a"])
+    assert np.allclose(rb["a"][2:4], td2["a"])
 
 
 def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times():
@@ -68,7 +68,7 @@ def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times():
     assert rb.full
     assert rb._pos == 5
     remainder = len(td3["a"]) % buf_size
-    np.testing.assert_allclose(rb["a"][: rb._pos], td3["a"][rb.buffer_size - rb._pos + remainder :])
+    assert np.allclose(rb["a"][: rb._pos], td3["a"][rb.buffer_size - rb._pos + remainder :])
 
 
 def test_replay_buffer_add_single_td_size_is_not_multiple():
@@ -80,8 +80,8 @@ def test_replay_buffer_add_single_td_size_is_not_multiple():
     assert rb.full
     assert rb._pos == 2
     remainder = len(td1["a"]) % buf_size
-    np.testing.assert_allclose(rb["a"][:remainder], td1["a"][-remainder:])
-    np.testing.assert_allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder])
+    assert np.allclose(rb["a"][:remainder], td1["a"][-remainder:])
+    assert np.allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder])
 
 
 def test_replay_buffer_add_single_td_size_is_multiple():
@@ -92,7 +92,7 @@ def test_replay_buffer_add_single_td_size_is_multiple():
     rb.add(td1)
     assert rb.full
     assert rb._pos == 0
-    np.testing.assert_allclose(rb["a"], td1["a"][-buf_size:])
+    assert np.allclose(rb["a"], td1["a"][-buf_size:])
 
 
 def test_replay_buffer_add_replay_buffer():

@@ -31,7 +31,7 @@ def test_seq_replay_buffer_add_tds():
     assert rb.full
     assert rb["a"][0] == td3["a"][-2]
     assert rb["a"][1] == td3["a"][-1]
-    np.testing.assert_allclose(rb["a"][2:4], td2["a"])
+    assert rb["a"][2] == td2["a"][-2]
 
 
 def test_seq_replay_buffer_add_single_td():

@@ -62,7 +62,7 @@ def test_actions_as_observation_wrapper(env_id: str, num_stack, dilation):
         expected_actions_stack = list(expected_actions)[dilation - 1 :: dilation]
         expected_actions_stack = np.concatenate(expected_actions_stack, axis=-1).astype(np.float32)
 
-        np.testing.assert_array_equal(o["action_stack"], expected_actions_stack)
+        assert np.allclose(o["action_stack"], expected_actions_stack)
 
 
 @pytest.mark.parametrize("num_stack", [-1, 0])

@@ -87,7 +87,7 @@ def test_framestack(num_stack, dilation):
             ],
             axis=0,
         )
-        np.testing.assert_array_equal(obs["rgb"], expected_frame)
+        assert np.allclose(obs["rgb"], expected_frame)
 
 
 @pytest.mark.parametrize("env_id", ENVIRONMENTS.keys())

@@ -14,7 +14,7 @@
 
 def test_mask_velocities_fail():
     with pytest.raises(NotImplementedError):
-        env = gym.make("CarRacing-v2")
+        env = gym.make("CarRacing-v3")
         env = MaskVelocityWrapper(env)
 
 
@@ -48,7 +48,7 @@ def test_rewards_as_observation_wrapper_step_method(env_id, dict_obs_space):
     else:
         assert "obs" in obs
         assert "reward" in obs
-    np.testing.assert_array_equal(obs["reward"], np.array([0.0]))
+    assert np.allclose(obs["reward"], np.array([0.0]))
 
 
 @pytest.mark.parametrize("env_id", ENVIRONMENTS.keys())
@@ -65,7 +65,7 @@ def test_rewards_as_observation_wrapper_reset_method(env_id, dict_obs_space):
     else:
         assert "obs" in obs
         assert "reward" in obs
-    np.testing.assert_array_equal(obs["reward"], np.array([0.0]))
+    assert np.allclose(obs["reward"], np.array([0.0]))
 
 
 @pytest.mark.parametrize("amount", [-1.3, -1, 0])
-Original file line number
+Diff line change
@@ Expand Up / @@ -87,7 +87,7 @@ def test_framestack(num_stack, dilation): @@
                 ],
                 axis=0,
             )
-            np.testing.assert_array_equal(obs["rgb"], expected_frame)
+            assert np.allclose(obs["rgb"], expected_frame)
     @pytest.mark.parametrize("env_id", ENVIRONMENTS.keys())
@@ Expand Down @@