From aed12892285ac7aa4e05c3d1e53a72665629146c Mon Sep 17 00:00:00 2001 From: belerico Date: Thu, 16 Jan 2025 10:51:47 +0100 Subject: [PATCH 1/8] Update packages: numpy, opencv, lightning-utilities --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c89f0a500..9edfcad9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,13 +33,13 @@ dependencies = [ "tensorboard>=2.10", "python-dotenv>=1.0.0", "lightning>=2.0", - "lightning-utilities<=0.9", + "lightning-utilities<=0.11.9", "hydra-core==1.3.0", "torchmetrics", "rich==13.5.*", - "opencv-python==4.8.0.*", + "opencv-python==4.10.*", "torch>=2.0,!=2.2.0", - "numpy<2.0" + "numpy>=2.0" ] dynamic = ["version"] From 176616e2e1c2ebc3ab5cba036e47fd0f14e84752 Mon Sep 17 00:00:00 2001 From: belerico Date: Thu, 16 Jan 2025 11:46:00 +0100 Subject: [PATCH 2/8] gymnasium 1.0.0 --- benchmarks/benchmark_sb3.py | 4 ++-- howto/select_observations.md | 6 +++--- pyproject.toml | 2 +- sheeprl/configs/exp/sac.yaml | 2 +- sheeprl/configs/exp/sac_benchmarks.yaml | 2 +- sheeprl/envs/dummy.py | 4 ++-- sheeprl/envs/wrappers.py | 2 +- sheeprl/utils/env.py | 20 +++++++++++++------- 8 files changed, 24 insertions(+), 18 deletions(-) diff --git a/benchmarks/benchmark_sb3.py b/benchmarks/benchmark_sb3.py index 5cf92b1ce..e2168b36c 100644 --- a/benchmarks/benchmark_sb3.py +++ b/benchmarks/benchmark_sb3.py @@ -27,13 +27,13 @@ # print(sb3.common.evaluation.evaluate_policy(model.policy, env)) -# Stable Baselines3 SAC - LunarLanderContinuous-v2 +# Stable Baselines3 SAC - LunarLanderContinuous-v3 # Decomment below to run SAC benchmarks # if __name__ == "__main__": # with timer("run_time", SumMetric, sync_on_compute=False): # env = sb3.common.vec_env.DummyVecEnv( -# [lambda: gym.make("LunarLanderContinuous-v2", render_mode="rgb_array") for _ in range(4)] +# [lambda: gym.make("LunarLanderContinuous-v3", render_mode="rgb_array") for _ in range(4)] # ) # model = SAC("MlpPolicy", env, verbose=0, device="cpu") # model.learn(total_timesteps=1024 * 64, log_interval=None) diff --git a/howto/select_observations.md b/howto/select_observations.md index 61a6188c6..7979f63b9 100644 --- a/howto/select_observations.md +++ b/howto/select_observations.md @@ -80,13 +80,13 @@ The algorithms that work with only vector observations are reported here: * SAC * Droq -For any of them you **must select** only the environments that provide vector observations. For instance, you can train the *SAC* algorithm on the `LunarLanderContinuous-v2` environment, but you cannot train it on the `CarRacing-v2` environment. +For any of them you **must select** only the environments that provide vector observations. For instance, you can train the *SAC* algorithm on the `LunarLanderContinuous-v3` environment, but you cannot train it on the `CarRacing-v2` environment. For these algorithms, you have to specify the *mlp* keys you want to encode. As usual, you have to specify them through the `mlp_keys.encoder` and `mlp_keys.decoder` arguments (in the command or the configs). -For instance, you can train a SAC agent on the `LunarLanderContinuous-v2` with the following command: +For instance, you can train a SAC agent on the `LunarLanderContinuous-v3` with the following command: ```bash -python sheeprl.py exp=sac env=gym env.id=LunarLanderContinuous-v2 algo.mlp_keys.encoder=[state] +python sheeprl.py exp=sac env=gym env.id=LunarLanderContinuous-v3 algo.mlp_keys.encoder=[state] ``` diff --git a/pyproject.toml b/pyproject.toml index 9edfcad9d..7315f1f5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ readme = { file = "docs/README.md", content-type = "text/markdown" } requires-python = ">=3.8,<3.12" classifiers = ["Programming Language :: Python", "Topic :: Scientific/Engineering :: Artificial Intelligence"] dependencies = [ - "gymnasium==0.29.*", + "gymnasium==1.0.0", "pygame >=2.1.3", "moviepy>=1.0.3", "tensorboard>=2.10", diff --git a/sheeprl/configs/exp/sac.yaml b/sheeprl/configs/exp/sac.yaml index 505e87198..e496534c0 100644 --- a/sheeprl/configs/exp/sac.yaml +++ b/sheeprl/configs/exp/sac.yaml @@ -26,7 +26,7 @@ buffer: # Environment env: - id: LunarLanderContinuous-v2 + id: LunarLanderContinuous-v3 metric: aggregator: diff --git a/sheeprl/configs/exp/sac_benchmarks.yaml b/sheeprl/configs/exp/sac_benchmarks.yaml index 43e089457..097d16c26 100644 --- a/sheeprl/configs/exp/sac_benchmarks.yaml +++ b/sheeprl/configs/exp/sac_benchmarks.yaml @@ -10,7 +10,7 @@ run_benchmarks: True # Environment env: - id: LunarLanderContinuous-v2 + id: LunarLanderContinuous-v3 capture_video: False num_envs: 4 diff --git a/sheeprl/envs/dummy.py b/sheeprl/envs/dummy.py index fcaf74601..750ab17f0 100644 --- a/sheeprl/envs/dummy.py +++ b/sheeprl/envs/dummy.py @@ -18,7 +18,7 @@ def __init__( if self._dict_obs_space: self.observation_space = gym.spaces.Dict( { - "rgb": gym.spaces.Box(0, 256, shape=image_size, dtype=np.uint8), + "rgb": gym.spaces.Box(0, 255, shape=image_size, dtype=np.uint8), "state": gym.spaces.Box(-20, 20, shape=vector_shape, dtype=np.float32), } ) @@ -43,7 +43,7 @@ def get_obs(self) -> Dict[str, np.ndarray]: if self._dict_obs_space: return { # da sostituire con np.random.rand - "rgb": np.full(self.observation_space["rgb"].shape, self._current_step % 256, dtype=np.uint8), + "rgb": np.full(self.observation_space["rgb"].shape, self._current_step % 255, dtype=np.uint8), "state": np.full(self.observation_space["state"].shape, self._current_step, dtype=np.uint8), } else: diff --git a/sheeprl/envs/wrappers.py b/sheeprl/envs/wrappers.py index cc285b11b..0b042a1e3 100644 --- a/sheeprl/envs/wrappers.py +++ b/sheeprl/envs/wrappers.py @@ -25,7 +25,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper): "MountainCarContinuous-v0": np.array([1]), "Pendulum-v1": np.array([2]), "LunarLander-v2": np.array([2, 3, 5]), - "LunarLanderContinuous-v2": np.array([2, 3, 5]), + "LunarLanderContinuous-v3": np.array([2, 3, 5]), } def __init__(self, env: gym.Env): diff --git a/sheeprl/utils/env.py b/sheeprl/utils/env.py index 750d85ee5..312f37cae 100644 --- a/sheeprl/utils/env.py +++ b/sheeprl/utils/env.py @@ -107,10 +107,14 @@ def thunk() -> gym.Env: f"is allowed in {cfg.env.id}, " f"only the first one is kept: {cfg.algo.cnn_keys.encoder[0]}" ) + obs_key = "state" if encoder_mlp_keys_length > 0: - gym.wrappers.pixel_observation.STATE_KEY = cfg.algo.mlp_keys.encoder[0] - env = gym.wrappers.PixelObservationWrapper( - env, pixels_only=encoder_mlp_keys_length == 0, pixel_keys=(cfg.algo.cnn_keys.encoder[0],) + obs_key = cfg.algo.mlp_keys.encoder[0] + env = gym.wrappers.AddRenderObservation( + env, + render_only=encoder_mlp_keys_length == 0, + render_key=cfg.algo.cnn_keys.encoder[0], + obs_key=obs_key, ) else: if encoder_mlp_keys_length > 1: @@ -120,7 +124,7 @@ def thunk() -> gym.Env: f"only the first one is kept: {cfg.algo.mlp_keys.encoder[0]}" ) mlp_key = cfg.algo.mlp_keys.encoder[0] - env = gym.wrappers.TransformObservation(env, lambda obs: {mlp_key: obs}) + env = gym.wrappers.TransformObservation(env, lambda obs: {mlp_key: obs}, None) env.observation_space = gym.spaces.Dict({mlp_key: env.observation_space}) elif isinstance(env.observation_space, gym.spaces.Box) and 2 <= len(env.observation_space.shape) <= 3: # Pixel only observation @@ -136,7 +140,9 @@ def thunk() -> gym.Env: "Please set at least one cnn key in the config file: `algo.cnn_keys.encoder=[your_cnn_key]`" ) cnn_key = cfg.algo.cnn_keys.encoder[0] - env = gym.wrappers.TransformObservation(env, lambda obs: {cnn_key: obs}) + env = gym.wrappers.TransformObservation( + env, lambda obs: {cnn_key: obs}, gym.spaces.Dict({cnn_key: env.observation_space}) + ) env.observation_space = gym.spaces.Dict({cnn_key: env.observation_space}) if ( @@ -195,7 +201,7 @@ def transform_obs(obs: Dict[str, Any]): return obs - env = gym.wrappers.TransformObservation(env, transform_obs) + env = gym.wrappers.TransformObservation(env, transform_obs, None) for k in cnn_keys: env.observation_space[k] = gym.spaces.Box( 0, 255, (1 if cfg.env.grayscale else 3, cfg.env.screen_size, cfg.env.screen_size), np.uint8 @@ -222,7 +228,7 @@ def transform_obs(obs: Dict[str, Any]): if cfg.env.capture_video and rank == 0 and vector_env_idx == 0 and run_name is not None: if cfg.env.grayscale: env = GrayscaleRenderWrapper(env) - env = gym.experimental.wrappers.RecordVideoV0( + env = gym.wrappers.RecordVideo( env, os.path.join(run_name, prefix + "_videos" if prefix else "videos"), disable_logger=True ) env.metadata["render_fps"] = env.frames_per_sec From 93f20ec42385077eab5345e8ab242c7d459c4469 Mon Sep 17 00:00:00 2001 From: belerico Date: Thu, 16 Jan 2025 11:51:39 +0100 Subject: [PATCH 3/8] Update packages --- pyproject.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7315f1f5b..4cba2f3e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,13 +74,13 @@ dev = [ "autoflake==2.2.1", "ruff==0.1.11", ] -mujoco = ["mujoco>=2.3.3", "gymnasium[mujoco]==0.29.*"] +mujoco = ["mujoco>=2.3.3", "gymnasium[mujoco]==1.0.0"] dmc = ["dm_control>=1.0.12"] -box2d = ["gymnasium[box2d]==0.29.*"] +box2d = ["gymnasium[box2d]==1.0.0"] atari = [ - "gymnasium[atari]==0.29.*", - "gymnasium[accept-rom-license]==0.29.*", - "gymnasium[other]==0.29.*", + "gymnasium[atari]==1.0.0", + "gymnasium[accept-rom-license]==1.0.0", + "gymnasium[other]==1.0.0", ] minedojo = ["minedojo==0.1", "importlib_resources==5.12.0", "gym==0.21.0"] minerl = ["setuptools==66.0.0", "minerl==0.4.4", "gym==0.19.0"] From 421cdce403ca72a8eeffd5e32304d3359eb53fbe Mon Sep 17 00:00:00 2001 From: belerico Date: Fri, 17 Jan 2025 09:58:46 +0100 Subject: [PATCH 4/8] Fix torch version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4cba2f3e5..3d52785a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "torchmetrics", "rich==13.5.*", "opencv-python==4.10.*", - "torch>=2.0,!=2.2.0", + "torch==2.3.1", "numpy>=2.0" ] dynamic = ["version"] From 81d413ec2168b841455c584799fae44c4fa8ee9e Mon Sep 17 00:00:00 2001 From: belerico Date: Fri, 17 Jan 2025 10:29:40 +0100 Subject: [PATCH 5/8] From np.testing_assert_allclose to np.allclose --- tests/test_data/test_buffers.py | 13 ++++++------- tests/test_data/test_sequential_buffer.py | 2 +- tests/test_envs/test_actions_as_observations.py | 2 +- tests/test_envs/test_frame_stack.py | 2 +- tests/test_envs/test_wrappers.py | 6 +++--- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/tests/test_data/test_buffers.py b/tests/test_data/test_buffers.py index c5c069ccb..11457be65 100644 --- a/tests/test_data/test_buffers.py +++ b/tests/test_data/test_buffers.py @@ -34,7 +34,7 @@ def test_replay_buffer_add_single_td_not_full(): rb.add(td1) assert not rb.full assert rb._pos == 2 - np.testing.assert_allclose(rb["a"][:2], td1["a"]) + assert np.allclose(rb["a"][:2] , td1["a"]) def test_replay_buffer_add_tds(): @@ -51,7 +51,7 @@ def test_replay_buffer_add_tds(): assert rb["a"][0] == td3["a"][-2] assert rb["a"][1] == td3["a"][-1] assert rb._pos == 2 - np.testing.assert_allclose(rb["a"][2:4], td2["a"]) + assert np.allclose(rb["a"][2:4] , td2["a"]) def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times(): @@ -68,7 +68,7 @@ def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times(): assert rb.full assert rb._pos == 5 remainder = len(td3["a"]) % buf_size - np.testing.assert_allclose(rb["a"][: rb._pos], td3["a"][rb.buffer_size - rb._pos + remainder :]) + assert np.allclose(rb["a"][: rb._pos], td3["a"][rb.buffer_size - rb._pos + remainder :]) def test_replay_buffer_add_single_td_size_is_not_multiple(): @@ -80,9 +80,8 @@ def test_replay_buffer_add_single_td_size_is_not_multiple(): assert rb.full assert rb._pos == 2 remainder = len(td1["a"]) % buf_size - np.testing.assert_allclose(rb["a"][:remainder], td1["a"][-remainder:]) - np.testing.assert_allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder]) - + assert np.allclose(rb["a"][:remainder], td1["a"][-remainder:]) + assert np.allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder]) def test_replay_buffer_add_single_td_size_is_multiple(): buf_size = 5 @@ -92,7 +91,7 @@ def test_replay_buffer_add_single_td_size_is_multiple(): rb.add(td1) assert rb.full assert rb._pos == 0 - np.testing.assert_allclose(rb["a"], td1["a"][-buf_size:]) + assert np.allclose(rb["a"], td1["a"][-buf_size:]) def test_replay_buffer_add_replay_buffer(): diff --git a/tests/test_data/test_sequential_buffer.py b/tests/test_data/test_sequential_buffer.py index 95bf8a8b8..362a6219c 100644 --- a/tests/test_data/test_sequential_buffer.py +++ b/tests/test_data/test_sequential_buffer.py @@ -31,7 +31,7 @@ def test_seq_replay_buffer_add_tds(): assert rb.full assert rb["a"][0] == td3["a"][-2] assert rb["a"][1] == td3["a"][-1] - np.testing.assert_allclose(rb["a"][2:4], td2["a"]) + assert rb["a"][2] == td2["a"][-2] def test_seq_replay_buffer_add_single_td(): diff --git a/tests/test_envs/test_actions_as_observations.py b/tests/test_envs/test_actions_as_observations.py index 35ec9d36c..69110d2bc 100644 --- a/tests/test_envs/test_actions_as_observations.py +++ b/tests/test_envs/test_actions_as_observations.py @@ -62,7 +62,7 @@ def test_actions_as_observation_wrapper(env_id: str, num_stack, dilation): expected_actions_stack = list(expected_actions)[dilation - 1 :: dilation] expected_actions_stack = np.concatenate(expected_actions_stack, axis=-1).astype(np.float32) - np.testing.assert_array_equal(o["action_stack"], expected_actions_stack) + assert np.allclose(o["action_stack"], expected_actions_stack) @pytest.mark.parametrize("num_stack", [-1, 0]) diff --git a/tests/test_envs/test_frame_stack.py b/tests/test_envs/test_frame_stack.py index e7e3e825c..58e3a51f2 100644 --- a/tests/test_envs/test_frame_stack.py +++ b/tests/test_envs/test_frame_stack.py @@ -87,7 +87,7 @@ def test_framestack(num_stack, dilation): ], axis=0, ) - np.testing.assert_array_equal(obs["rgb"], expected_frame) + assert np.allclose(obs["rgb"], expected_frame) @pytest.mark.parametrize("env_id", ENVIRONMENTS.keys()) diff --git a/tests/test_envs/test_wrappers.py b/tests/test_envs/test_wrappers.py index 8189b573e..ce0b76bc6 100644 --- a/tests/test_envs/test_wrappers.py +++ b/tests/test_envs/test_wrappers.py @@ -14,7 +14,7 @@ def test_mask_velocities_fail(): with pytest.raises(NotImplementedError): - env = gym.make("CarRacing-v2") + env = gym.make("CarRacing-v3") env = MaskVelocityWrapper(env) @@ -48,7 +48,7 @@ def test_rewards_as_observation_wrapper_step_method(env_id, dict_obs_space): else: assert "obs" in obs assert "reward" in obs - np.testing.assert_array_equal(obs["reward"], np.array([0.0])) + assert np.allclose(obs["reward"], np.array([0.0])) @pytest.mark.parametrize("env_id", ENVIRONMENTS.keys()) @@ -65,7 +65,7 @@ def test_rewards_as_observation_wrapper_reset_method(env_id, dict_obs_space): else: assert "obs" in obs assert "reward" in obs - np.testing.assert_array_equal(obs["reward"], np.array([0.0])) + assert np.allclose(obs["reward"], np.array([0.0])) @pytest.mark.parametrize("amount", [-1.3, -1, 0]) From d290e02c748b4470a2d26bdff59f0c21736995e7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:30:05 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_data/test_buffers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_data/test_buffers.py b/tests/test_data/test_buffers.py index 11457be65..d3caee2aa 100644 --- a/tests/test_data/test_buffers.py +++ b/tests/test_data/test_buffers.py @@ -34,7 +34,7 @@ def test_replay_buffer_add_single_td_not_full(): rb.add(td1) assert not rb.full assert rb._pos == 2 - assert np.allclose(rb["a"][:2] , td1["a"]) + assert np.allclose(rb["a"][:2], td1["a"]) def test_replay_buffer_add_tds(): @@ -51,7 +51,7 @@ def test_replay_buffer_add_tds(): assert rb["a"][0] == td3["a"][-2] assert rb["a"][1] == td3["a"][-1] assert rb._pos == 2 - assert np.allclose(rb["a"][2:4] , td2["a"]) + assert np.allclose(rb["a"][2:4], td2["a"]) def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times(): @@ -83,6 +83,7 @@ def test_replay_buffer_add_single_td_size_is_not_multiple(): assert np.allclose(rb["a"][:remainder], td1["a"][-remainder:]) assert np.allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder]) + def test_replay_buffer_add_single_td_size_is_multiple(): buf_size = 5 n_envs = 1 From c226fc6e9b80be12d2c52af094f1c91bfdd30568 Mon Sep 17 00:00:00 2001 From: belerico Date: Fri, 17 Jan 2025 15:13:12 +0100 Subject: [PATCH 7/8] Set np version for Winzoz and Linux --- pyproject.toml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3d52785a5..d0acd3a63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ requires-python = ">=3.8,<3.12" classifiers = ["Programming Language :: Python", "Topic :: Scientific/Engineering :: Artificial Intelligence"] dependencies = [ "gymnasium==1.0.0", - "pygame >=2.1.3", + "pygame>=2.1.3", "moviepy>=1.0.3", "tensorboard>=2.10", "python-dotenv>=1.0.0", @@ -39,7 +39,10 @@ dependencies = [ "rich==13.5.*", "opencv-python==4.10.*", "torch==2.3.1", - "numpy>=2.0" + # Windows only: + "numpy==1.26.0; sys_platform == 'win32'", + # Non-Windows (Linux, macOS, etc.): + "numpy>=2.0.0; sys_platform != 'win32'", ] dynamic = ["version"] From fad22bcad681893765ced1dbc558423ae2fd259f Mon Sep 17 00:00:00 2001 From: belerico Date: Tue, 21 Jan 2025 10:23:24 +0100 Subject: [PATCH 8/8] deprecated python3.8 --- .github/workflows/cpu-tests.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpu-tests.yaml b/.github/workflows/cpu-tests.yaml index 2cc88abf8..6c82a44bf 100644 --- a/.github/workflows/cpu-tests.yaml +++ b/.github/workflows/cpu-tests.yaml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11"] timeout-minutes: 20 diff --git a/pyproject.toml b/pyproject.toml index d0acd3a63..17e1ae2f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ maintainers = [ keywords = ["reinforcement", "machine", "learning", "distributed", "production"] license = { file = "LICENSE" } readme = { file = "docs/README.md", content-type = "text/markdown" } -requires-python = ">=3.8,<3.12" +requires-python = ">=3.9,<3.12" classifiers = ["Programming Language :: Python", "Topic :: Scientific/Engineering :: Artificial Intelligence"] dependencies = [ "gymnasium==1.0.0",