diff --git a/chapter7_tricks/hw_submission/q1/q1.py b/chapter7_tricks/hw_submission/q1/q1.py new file mode 100644 index 0000000..87238c7 --- /dev/null +++ b/chapter7_tricks/hw_submission/q1/q1.py @@ -0,0 +1,129 @@ +from typing import Optional, Tuple, List +import torch +import torch.nn as nn +import treetensor.torch as ttorch + + +class PPOFModel(nn.Module): + mode = ['compute_actor', 'compute_critic', 'compute_actor_critic'] + + def __init__( + self, + obs_shape: Tuple[int], + action_shape: int, + encoder_hidden_size_list: List = [128, 128, 64], + actor_head_hidden_size: int = 64, + actor_head_layer_num: int = 1, + critic_head_hidden_size: int = 64, + critic_head_layer_num: int = 1, + activation: Optional[nn.Module] = nn.ReLU(), + ) -> None: + super(PPOFModel, self).__init__() + self.obs_shape, self.action_shape = obs_shape, action_shape + + # encoder + layers = [] + input_size = obs_shape[0] + kernel_size_list = [8, 4, 3] + stride_list = [4, 2, 1] + for i in range(len(encoder_hidden_size_list)): + output_size = encoder_hidden_size_list[i] + layers.append(nn.Conv2d(input_size, output_size, kernel_size_list[i], stride_list[i])) + layers.append(activation) + input_size = output_size + layers.append(nn.Flatten()) + self.encoder = nn.Sequential(*layers) + + flatten_size = input_size = self.get_flatten_size() + # critic + layers = [] + for i in range(critic_head_layer_num): + layers.append(nn.Linear(input_size, critic_head_hidden_size)) + layers.append(activation) + input_size = critic_head_hidden_size + layers.append(nn.Linear(critic_head_hidden_size, 1)) + self.critic = nn.Sequential(*layers) + # actor + layers = [] + input_size = flatten_size + for i in range(actor_head_layer_num): + layers.append(nn.Linear(input_size, actor_head_hidden_size)) + layers.append(activation) + input_size = actor_head_hidden_size + self.actor = nn.Sequential(*layers) + self.mu = nn.Linear(actor_head_hidden_size, action_shape) + self.log_sigma = nn.Parameter(torch.zeros(1, action_shape)) + + # init weights + self.init_weights() + + def init_weights(self) -> None: + # You need to implement this function + # raise NotImplementedError + + # orthogonal init + def orthogonal_init(layer, gain=1.0): + nn.init.orthogonal_(layer.weight, gain=gain) + nn.init.constant_(layer.bias, 0) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + orthogonal_init(m) + elif isinstance(m, nn.Linear): + orthogonal_init(m) + + # output layer init + orthogonal_init(self.mu, gain=0.01) + + + def get_flatten_size(self) -> int: + test_data = torch.randn(1, *self.obs_shape) + with torch.no_grad(): + output = self.encoder(test_data) + return output.shape[1] + + def forward(self, inputs: ttorch.Tensor, mode: str) -> ttorch.Tensor: + assert mode in self.mode, "not support forward mode: {}/{}".format(mode, self.mode) + return getattr(self, mode)(inputs) + + def compute_actor(self, x: ttorch.Tensor) -> ttorch.Tensor: + x = self.encoder(x) + x = self.actor(x) + mu = self.mu(x) + log_sigma = self.log_sigma + torch.zeros_like(mu) # addition aims to broadcast shape + sigma = torch.exp(log_sigma) + return ttorch.as_tensor({'mu': mu, 'sigma': sigma}) + + def compute_critic(self, x: ttorch.Tensor) -> ttorch.Tensor: + x = self.encoder(x) + value = self.critic(x) + return value + + def compute_actor_critic(self, x: ttorch.Tensor) -> ttorch.Tensor: + x = self.encoder(x) + value = self.critic(x) + x = self.actor(x) + mu = self.mu(x) + log_sigma = self.log_sigma + torch.zeros_like(mu) # addition aims to broadcast shape + sigma = torch.exp(log_sigma) + return ttorch.as_tensor({'logit': {'mu': mu, 'sigma': sigma}, 'value': value}) + + +def test_ppof_model() -> None: + model = PPOFModel((4, 84, 84), 5) + print(model) + data = torch.randn(3, 4, 84, 84) + output = model(data, mode='compute_critic') + assert output.shape == (3, 1) + output = model(data, mode='compute_actor') + assert output.mu.shape == (3, 5) + assert output.sigma.shape == (3, 5) + output = model(data, mode='compute_actor_critic') + assert output.value.shape == (3, 1) + assert output.logit.mu.shape == (3, 5) + assert output.logit.sigma.shape == (3, 5) + print('End...') + + +if __name__ == "__main__": + test_ppof_model() \ No newline at end of file diff --git a/chapter7_tricks/hw_submission/q2/q2.py b/chapter7_tricks/hw_submission/q2/q2.py new file mode 100644 index 0000000..d0cc027 --- /dev/null +++ b/chapter7_tricks/hw_submission/q2/q2.py @@ -0,0 +1,59 @@ +from easydict import EasyDict + +qbert_dqn_config = dict( + exp_name='qbert_dqn_seed0', + env=dict( + collector_env_num=8, + evaluator_env_num=8, + n_evaluator_episode=8, + stop_value=30000, + env_id='Qbert-v4', + #'ALE/Qbert-v5' is available. But special setting is needed after gym make. + frame_stack=4 + ), + policy=dict( + cuda=True, + priority=False, + model=dict( + obs_shape=[4, 84, 84], + action_shape=6, + encoder_hidden_size_list=[128, 128, 512], + ), + nstep=3, + discount_factor=0.99, + learn=dict( + update_per_collect=10, + batch_size=32, + learning_rate=0.0001, + target_update_freq=500, + ), + collect=dict(n_sample=100, ), + eval=dict(evaluator=dict(eval_freq=4000, )), + other=dict( + eps=dict( + type='exp', + start=1., + end=0.05, + decay=1000000, + ), + replay_buffer=dict(replay_buffer_size=400000, ), + ), + ), +) +qbert_dqn_config = EasyDict(qbert_dqn_config) +main_config = qbert_dqn_config +qbert_dqn_create_config = dict( + env=dict( + type='atari', + import_names=['dizoo.atari.envs.atari_env'], + ), + env_manager=dict(type='subprocess'), + policy=dict(type='dqn'), +) +qbert_dqn_create_config = EasyDict(qbert_dqn_create_config) +create_config = qbert_dqn_create_config + +if __name__ == '__main__': + # or you can enter ding -m serial -c qbert_dqn_config.py -s 0 + from ding.entry import serial_pipeline + serial_pipeline((main_config, create_config), seed=0) \ No newline at end of file diff --git "a/chapter7_tricks/hw_submission/q2/qbert\350\277\220\350\241\214\346\210\252\345\233\276.png" "b/chapter7_tricks/hw_submission/q2/qbert\350\277\220\350\241\214\346\210\252\345\233\276.png" new file mode 100644 index 0000000..ff722b0 Binary files /dev/null and "b/chapter7_tricks/hw_submission/q2/qbert\350\277\220\350\241\214\346\210\252\345\233\276.png" differ