Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/cpu-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11"]

timeout-minutes: 20

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmark_sb3.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
# print(sb3.common.evaluation.evaluate_policy(model.policy, env))


# Stable Baselines3 SAC - LunarLanderContinuous-v2
# Stable Baselines3 SAC - LunarLanderContinuous-v3
# Decomment below to run SAC benchmarks

# if __name__ == "__main__":
# with timer("run_time", SumMetric, sync_on_compute=False):
# env = sb3.common.vec_env.DummyVecEnv(
# [lambda: gym.make("LunarLanderContinuous-v2", render_mode="rgb_array") for _ in range(4)]
# [lambda: gym.make("LunarLanderContinuous-v3", render_mode="rgb_array") for _ in range(4)]
# )
# model = SAC("MlpPolicy", env, verbose=0, device="cpu")
# model.learn(total_timesteps=1024 * 64, log_interval=None)
Expand Down
6 changes: 3 additions & 3 deletions howto/select_observations.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ The algorithms that work with only vector observations are reported here:
* SAC
* Droq

For any of them you **must select** only the environments that provide vector observations. For instance, you can train the *SAC* algorithm on the `LunarLanderContinuous-v2` environment, but you cannot train it on the `CarRacing-v2` environment.
For any of them you **must select** only the environments that provide vector observations. For instance, you can train the *SAC* algorithm on the `LunarLanderContinuous-v3` environment, but you cannot train it on the `CarRacing-v2` environment.

For these algorithms, you have to specify the *mlp* keys you want to encode. As usual, you have to specify them through the `mlp_keys.encoder` and `mlp_keys.decoder` arguments (in the command or the configs).

For instance, you can train a SAC agent on the `LunarLanderContinuous-v2` with the following command:
For instance, you can train a SAC agent on the `LunarLanderContinuous-v3` with the following command:
```bash
python sheeprl.py exp=sac env=gym env.id=LunarLanderContinuous-v2 algo.mlp_keys.encoder=[state]
python sheeprl.py exp=sac env=gym env.id=LunarLanderContinuous-v3 algo.mlp_keys.encoder=[state]
```


Expand Down
27 changes: 15 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,25 @@ maintainers = [
keywords = ["reinforcement", "machine", "learning", "distributed", "production"]
license = { file = "LICENSE" }
readme = { file = "docs/README.md", content-type = "text/markdown" }
requires-python = ">=3.8,<3.12"
requires-python = ">=3.9,<3.12"
classifiers = ["Programming Language :: Python", "Topic :: Scientific/Engineering :: Artificial Intelligence"]
dependencies = [
"gymnasium==0.29.*",
"pygame >=2.1.3",
"gymnasium==1.0.0",
"pygame>=2.1.3",
"moviepy>=1.0.3",
"tensorboard>=2.10",
"python-dotenv>=1.0.0",
"lightning>=2.0",
"lightning-utilities<=0.9",
"lightning-utilities<=0.11.9",
"hydra-core==1.3.0",
"torchmetrics",
"rich==13.5.*",
"opencv-python==4.8.0.*",
"torch>=2.0,!=2.2.0",
"numpy<2.0"
"opencv-python==4.10.*",
"torch==2.3.1",
# Windows only:
"numpy==1.26.0; sys_platform == 'win32'",
# Non-Windows (Linux, macOS, etc.):
"numpy>=2.0.0; sys_platform != 'win32'",
]
dynamic = ["version"]

Expand Down Expand Up @@ -74,13 +77,13 @@ dev = [
"autoflake==2.2.1",
"ruff==0.1.11",
]
mujoco = ["mujoco>=2.3.3", "gymnasium[mujoco]==0.29.*"]
mujoco = ["mujoco>=2.3.3", "gymnasium[mujoco]==1.0.0"]
dmc = ["dm_control>=1.0.12"]
box2d = ["gymnasium[box2d]==0.29.*"]
box2d = ["gymnasium[box2d]==1.0.0"]
atari = [
"gymnasium[atari]==0.29.*",
"gymnasium[accept-rom-license]==0.29.*",
"gymnasium[other]==0.29.*",
"gymnasium[atari]==1.0.0",
"gymnasium[accept-rom-license]==1.0.0",
"gymnasium[other]==1.0.0",
]
minedojo = ["minedojo==0.1", "importlib_resources==5.12.0", "gym==0.21.0"]
minerl = ["setuptools==66.0.0", "minerl==0.4.4", "gym==0.19.0"]
Expand Down
2 changes: 1 addition & 1 deletion sheeprl/configs/exp/sac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ buffer:

# Environment
env:
id: LunarLanderContinuous-v2
id: LunarLanderContinuous-v3

metric:
aggregator:
Expand Down
2 changes: 1 addition & 1 deletion sheeprl/configs/exp/sac_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ run_benchmarks: True

# Environment
env:
id: LunarLanderContinuous-v2
id: LunarLanderContinuous-v3
capture_video: False
num_envs: 4

Expand Down
4 changes: 2 additions & 2 deletions sheeprl/envs/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(
if self._dict_obs_space:
self.observation_space = gym.spaces.Dict(
{
"rgb": gym.spaces.Box(0, 256, shape=image_size, dtype=np.uint8),
"rgb": gym.spaces.Box(0, 255, shape=image_size, dtype=np.uint8),
"state": gym.spaces.Box(-20, 20, shape=vector_shape, dtype=np.float32),
}
)
Expand All @@ -43,7 +43,7 @@ def get_obs(self) -> Dict[str, np.ndarray]:
if self._dict_obs_space:
return {
# da sostituire con np.random.rand
"rgb": np.full(self.observation_space["rgb"].shape, self._current_step % 256, dtype=np.uint8),
"rgb": np.full(self.observation_space["rgb"].shape, self._current_step % 255, dtype=np.uint8),
"state": np.full(self.observation_space["state"].shape, self._current_step, dtype=np.uint8),
}
else:
Expand Down
2 changes: 1 addition & 1 deletion sheeprl/envs/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class MaskVelocityWrapper(gym.ObservationWrapper):
"MountainCarContinuous-v0": np.array([1]),
"Pendulum-v1": np.array([2]),
"LunarLander-v2": np.array([2, 3, 5]),
"LunarLanderContinuous-v2": np.array([2, 3, 5]),
"LunarLanderContinuous-v3": np.array([2, 3, 5]),
}

def __init__(self, env: gym.Env):
Expand Down
20 changes: 13 additions & 7 deletions sheeprl/utils/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,14 @@ def thunk() -> gym.Env:
f"is allowed in {cfg.env.id}, "
f"only the first one is kept: {cfg.algo.cnn_keys.encoder[0]}"
)
obs_key = "state"
if encoder_mlp_keys_length > 0:
gym.wrappers.pixel_observation.STATE_KEY = cfg.algo.mlp_keys.encoder[0]
env = gym.wrappers.PixelObservationWrapper(
env, pixels_only=encoder_mlp_keys_length == 0, pixel_keys=(cfg.algo.cnn_keys.encoder[0],)
obs_key = cfg.algo.mlp_keys.encoder[0]
env = gym.wrappers.AddRenderObservation(
env,
render_only=encoder_mlp_keys_length == 0,
render_key=cfg.algo.cnn_keys.encoder[0],
obs_key=obs_key,
)
else:
if encoder_mlp_keys_length > 1:
Expand All @@ -120,7 +124,7 @@ def thunk() -> gym.Env:
f"only the first one is kept: {cfg.algo.mlp_keys.encoder[0]}"
)
mlp_key = cfg.algo.mlp_keys.encoder[0]
env = gym.wrappers.TransformObservation(env, lambda obs: {mlp_key: obs})
env = gym.wrappers.TransformObservation(env, lambda obs: {mlp_key: obs}, None)
env.observation_space = gym.spaces.Dict({mlp_key: env.observation_space})
elif isinstance(env.observation_space, gym.spaces.Box) and 2 <= len(env.observation_space.shape) <= 3:
# Pixel only observation
Expand All @@ -136,7 +140,9 @@ def thunk() -> gym.Env:
"Please set at least one cnn key in the config file: `algo.cnn_keys.encoder=[your_cnn_key]`"
)
cnn_key = cfg.algo.cnn_keys.encoder[0]
env = gym.wrappers.TransformObservation(env, lambda obs: {cnn_key: obs})
env = gym.wrappers.TransformObservation(
env, lambda obs: {cnn_key: obs}, gym.spaces.Dict({cnn_key: env.observation_space})
)
env.observation_space = gym.spaces.Dict({cnn_key: env.observation_space})

if (
Expand Down Expand Up @@ -195,7 +201,7 @@ def transform_obs(obs: Dict[str, Any]):

return obs

env = gym.wrappers.TransformObservation(env, transform_obs)
env = gym.wrappers.TransformObservation(env, transform_obs, None)
for k in cnn_keys:
env.observation_space[k] = gym.spaces.Box(
0, 255, (1 if cfg.env.grayscale else 3, cfg.env.screen_size, cfg.env.screen_size), np.uint8
Expand All @@ -222,7 +228,7 @@ def transform_obs(obs: Dict[str, Any]):
if cfg.env.capture_video and rank == 0 and vector_env_idx == 0 and run_name is not None:
if cfg.env.grayscale:
env = GrayscaleRenderWrapper(env)
env = gym.experimental.wrappers.RecordVideoV0(
env = gym.wrappers.RecordVideo(
env, os.path.join(run_name, prefix + "_videos" if prefix else "videos"), disable_logger=True
)
env.metadata["render_fps"] = env.frames_per_sec
Expand Down
12 changes: 6 additions & 6 deletions tests/test_data/test_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_replay_buffer_add_single_td_not_full():
rb.add(td1)
assert not rb.full
assert rb._pos == 2
np.testing.assert_allclose(rb["a"][:2], td1["a"])
assert np.allclose(rb["a"][:2], td1["a"])


def test_replay_buffer_add_tds():
Expand All @@ -51,7 +51,7 @@ def test_replay_buffer_add_tds():
assert rb["a"][0] == td3["a"][-2]
assert rb["a"][1] == td3["a"][-1]
assert rb._pos == 2
np.testing.assert_allclose(rb["a"][2:4], td2["a"])
assert np.allclose(rb["a"][2:4], td2["a"])


def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times():
Expand All @@ -68,7 +68,7 @@ def test_replay_buffer_add_tds_exceeding_buf_size_multiple_times():
assert rb.full
assert rb._pos == 5
remainder = len(td3["a"]) % buf_size
np.testing.assert_allclose(rb["a"][: rb._pos], td3["a"][rb.buffer_size - rb._pos + remainder :])
assert np.allclose(rb["a"][: rb._pos], td3["a"][rb.buffer_size - rb._pos + remainder :])


def test_replay_buffer_add_single_td_size_is_not_multiple():
Expand All @@ -80,8 +80,8 @@ def test_replay_buffer_add_single_td_size_is_not_multiple():
assert rb.full
assert rb._pos == 2
remainder = len(td1["a"]) % buf_size
np.testing.assert_allclose(rb["a"][:remainder], td1["a"][-remainder:])
np.testing.assert_allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder])
assert np.allclose(rb["a"][:remainder], td1["a"][-remainder:])
assert np.allclose(rb["a"][remainder:], td1["a"][-buf_size:-remainder])


def test_replay_buffer_add_single_td_size_is_multiple():
Expand All @@ -92,7 +92,7 @@ def test_replay_buffer_add_single_td_size_is_multiple():
rb.add(td1)
assert rb.full
assert rb._pos == 0
np.testing.assert_allclose(rb["a"], td1["a"][-buf_size:])
assert np.allclose(rb["a"], td1["a"][-buf_size:])


def test_replay_buffer_add_replay_buffer():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/test_sequential_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_seq_replay_buffer_add_tds():
assert rb.full
assert rb["a"][0] == td3["a"][-2]
assert rb["a"][1] == td3["a"][-1]
np.testing.assert_allclose(rb["a"][2:4], td2["a"])
assert rb["a"][2] == td2["a"][-2]


def test_seq_replay_buffer_add_single_td():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_envs/test_actions_as_observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_actions_as_observation_wrapper(env_id: str, num_stack, dilation):
expected_actions_stack = list(expected_actions)[dilation - 1 :: dilation]
expected_actions_stack = np.concatenate(expected_actions_stack, axis=-1).astype(np.float32)

np.testing.assert_array_equal(o["action_stack"], expected_actions_stack)
assert np.allclose(o["action_stack"], expected_actions_stack)


@pytest.mark.parametrize("num_stack", [-1, 0])
Expand Down
2 changes: 1 addition & 1 deletion tests/test_envs/test_frame_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_framestack(num_stack, dilation):
],
axis=0,
)
np.testing.assert_array_equal(obs["rgb"], expected_frame)
assert np.allclose(obs["rgb"], expected_frame)


@pytest.mark.parametrize("env_id", ENVIRONMENTS.keys())
Expand Down
6 changes: 3 additions & 3 deletions tests/test_envs/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

def test_mask_velocities_fail():
with pytest.raises(NotImplementedError):
env = gym.make("CarRacing-v2")
env = gym.make("CarRacing-v3")
env = MaskVelocityWrapper(env)


Expand Down Expand Up @@ -48,7 +48,7 @@ def test_rewards_as_observation_wrapper_step_method(env_id, dict_obs_space):
else:
assert "obs" in obs
assert "reward" in obs
np.testing.assert_array_equal(obs["reward"], np.array([0.0]))
assert np.allclose(obs["reward"], np.array([0.0]))


@pytest.mark.parametrize("env_id", ENVIRONMENTS.keys())
Expand All @@ -65,7 +65,7 @@ def test_rewards_as_observation_wrapper_reset_method(env_id, dict_obs_space):
else:
assert "obs" in obs
assert "reward" in obs
np.testing.assert_array_equal(obs["reward"], np.array([0.0]))
assert np.allclose(obs["reward"], np.array([0.0]))


@pytest.mark.parametrize("amount", [-1.3, -1, 0])
Expand Down
Loading