Skip to content
Open

LSTM #147

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: black
language_version: python3.9
- repo: https://gitlab.com/pycqa/flake8
- repo: https://github.com/pycqa/flake8
rev: '3.9.2'
hooks:
- id: flake8
Expand Down
111 changes: 85 additions & 26 deletions pax/agents/ppo/networks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Tuple
from typing import Optional, Tuple, NamedTuple

import distrax
import haiku as hk
Expand Down Expand Up @@ -44,24 +44,24 @@ def __init__(
):
super().__init__(name=name)
self._action_body = hk.nets.MLP(
[64, 64],
w_init=hk.initializers.Orthogonal(jnp.sqrt(2)),
b_init=hk.initializers.Constant(0),
activate_final=True,
activation=jnp.tanh,
)
[64, 64],
w_init=hk.initializers.Orthogonal(jnp.sqrt(2)),
b_init=hk.initializers.Constant(0),
activate_final=True,
activation=jnp.tanh,
)
self._logit_layer = hk.Linear(
num_values,
w_init=hk.initializers.Orthogonal(1.0),
b_init=hk.initializers.Constant(0),
)
self._value_body = hk.nets.MLP(
[64, 64],
w_init=hk.initializers.Orthogonal(jnp.sqrt(2)),
b_init=hk.initializers.Constant(0),
activate_final=True,
activation=jnp.tanh,
)
[64, 64],
w_init=hk.initializers.Orthogonal(jnp.sqrt(2)),
b_init=hk.initializers.Constant(0),
activate_final=True,
activation=jnp.tanh,
)
self._value_layer = hk.Linear(
1,
w_init=hk.initializers.Orthogonal(0.01),
Expand Down Expand Up @@ -316,16 +316,13 @@ def forward_fn(inputs):
network = hk.without_apply_rng(hk.transform(forward_fn))
return network


def make_sarl_network(num_actions: int):
"""Creates a hk network using the baseline hyperparameters from OpenAI"""

def forward_fn(inputs):
layers = []
layers.extend(
[
CategoricalValueHeadSeparate(num_values=num_actions)
]
)
layers.extend([CategoricalValueHeadSeparate(num_values=num_actions)])
policy_value_network = hk.Sequential(layers)
return policy_value_network(inputs)

Expand All @@ -350,6 +347,25 @@ def forward_fn(
return network, hidden_state


def make_LSTM_ipd_network(num_actions: int, args):
hidden = jnp.zeros((1, args.ppo.hidden_size))
cell = jnp.zeros((1, args.ppo.hidden_size))
hidden_state = hk.LSTMState(hidden=hidden, cell=cell)

def forward_fn(
inputs: jnp.ndarray, state: NamedTuple
) -> Tuple[Tuple[jnp.ndarray, jnp.ndarray], NamedTuple]:
"""forward function"""
lstm = hk.LSTM(args.ppo.hidden_size)
embedding, state = lstm(inputs, state)
logits, values = CategoricalValueHead(num_actions)(embedding)
return (logits, values), state

network = hk.without_apply_rng(hk.transform(forward_fn))

return network, hidden_state


def make_GRU_cartpole_network(num_actions: int):
hidden_size = 256
hidden_state = jnp.zeros((1, hidden_size))
Expand All @@ -358,15 +374,27 @@ def forward_fn(
inputs: jnp.ndarray, state: jnp.ndarray
) -> Tuple[Tuple[jnp.ndarray, jnp.ndarray], jnp.ndarray]:
"""forward function"""
torso = hk.nets.MLP(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this being removed?

[hidden_size, hidden_size],
w_init=hk.initializers.Orthogonal(jnp.sqrt(2)),
b_init=hk.initializers.Constant(0),
activate_final=True,
)
gru = hk.GRU(hidden_size)
embedding = torso(inputs)
embedding, state = gru(embedding, state)
embedding, state = gru(inputs, state)
logits, values = CategoricalValueHead(num_actions)(embedding)
return (logits, values), state

network = hk.without_apply_rng(hk.transform(forward_fn))

return network, hidden_state


def make_LSTM_cartpole_network(num_actions: int, args):
hidden = jnp.zeros((1, args.ppo.hidden_size))
cell = jnp.zeros((1, args.ppo.hidden_size))
hidden_state = hk.LSTMState(hidden=hidden, cell=cell)

def forward_fn(
inputs: jnp.ndarray, state: jnp.ndarray
) -> Tuple[Tuple[jnp.ndarray, jnp.ndarray], jnp.ndarray]:
"""forward function"""
lstm = hk.LSTM(args.ppo.hidden_size)
embedding, state = lstm(inputs, state)
logits, values = CategoricalValueHead(num_actions)(embedding)
return (logits, values), state

Expand Down Expand Up @@ -404,6 +432,37 @@ def forward_fn(
return network, hidden_state


def make_LSTM_coingame_network(num_actions: int, args):
hidden = jnp.zeros((1, args.ppo.hidden_size))
cell = jnp.zeros((1, args.ppo.hidden_size))
hidden_state = hk.LSTMState(hidden=hidden, cell=cell)

def forward_fn(
inputs: jnp.ndarray, state: jnp.ndarray
) -> Tuple[Tuple[jnp.ndarray, jnp.ndarray], jnp.ndarray]:

if args.ppo.with_cnn:
torso = CNN(args)(inputs)

else:
torso = hk.nets.MLP(
[args.ppo.hidden_size],
w_init=hk.initializers.Orthogonal(jnp.sqrt(2)),
b_init=hk.initializers.Constant(0),
activate_final=True,
activation=jnp.tanh,
)
lstm = hk.LSTM(args.ppo.hidden_size)
embedding = torso(inputs)
embedding, state = lstm(embedding, state)
logits, values = CategoricalValueHead(num_actions)(embedding)

return (logits, values), state

network = hk.without_apply_rng(hk.transform(forward_fn))
return network, hidden_state


def test_GRU():
key = jax.random.PRNGKey(seed=0)
num_actions = 2
Expand Down
Loading