Skip to content

Commit da730bc

Browse files
authored
Merge pull request #146 from dee0512/master
Breakout RL experiments
2 parents 2cc97a8 + a3942d8 commit da730bc

File tree

4 files changed

+79
-11
lines changed

4 files changed

+79
-11
lines changed

bindsnet/pipeline/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ def __init__(self, network: Network, environment: Environment, encoding: Callabl
7979
for l in self.network.layers:
8080
self.network.add_monitor(Monitor(self.network.layers[l], 's', self.plot_interval * self.time),
8181
name=f'{l}_spikes')
82-
8382
if 'v' in self.network.layers[l].__dict__:
8483
self.network.add_monitor(Monitor(self.network.layers[l], 'v', self.plot_interval * self.time),
8584
name=f'{l}_voltages')

bindsnet/pipeline/action.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,12 @@ def select_softmax(pipeline: Pipeline, **kwargs) -> int:
6060
assert pipeline.network.layers[output].n == pipeline.env.action_space.n, \
6161
'Output layer size not equal to size of action space.'
6262

63+
assert hasattr(pipeline, 'spike_record'), 'Pipeline has not attribute named: spike_record.'
64+
6365
# Sum of previous iterations' spikes (Not yet implemented)
64-
spikes = pipeline.network.layers[output].s
66+
spikes = torch.sum(pipeline.spike_record[output], dim=1)
6567
_sum = torch.sum(torch.exp(spikes.float()))
6668

67-
# Choose action based on readout neuron spiking
6869
if _sum == 0:
6970
action = np.random.choice(pipeline.env.action_space.n)
7071
else:

examples/breakout/breakout.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,23 @@
1717
out = LIFNodes(n=4, refrac=0, traces=True)
1818

1919
# Connections between layers.
20-
inpt_middle = Connection(source=inpt, target=middle, wmax=1e-2)
21-
middle_out = Connection(source=middle, target=out, wmax=1e-1, nu=2e-2)
20+
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
21+
middle_out = Connection(source=middle, target=out, wmin=0, wmax=1)
2222

2323
# Add all layers and connections to the network.
24-
network.add_layer(inpt, name='X')
25-
network.add_layer(middle, name='Y')
26-
network.add_layer(out, name='Z')
27-
network.add_connection(inpt_middle, source='X', target='Y')
28-
network.add_connection(middle_out, source='Y', target='Z')
24+
network.add_layer(inpt, name='Input Layer')
25+
network.add_layer(middle, name='Hidden Layer')
26+
network.add_layer(out, name='Output Layer')
27+
network.add_connection(inpt_middle, source='Input Layer', target='Hidden Layer')
28+
network.add_connection(middle_out, source='Hidden Layer', target='Output Layer')
2929

3030
# Load SpaceInvaders environment.
3131
environment = GymEnvironment('BreakoutDeterministic-v4')
3232
environment.reset()
3333

3434
# Build pipeline from specified components.
3535
pipeline = Pipeline(network, environment, encoding=bernoulli,
36-
action_function=select_softmax, output='Z',
36+
action_function=select_softmax, output='Output Layer',
3737
time=100, history_length=1, delta=1,
3838
plot_interval=1, render_interval=1)
3939

examples/breakout/breakout_stdp.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import torch
2+
3+
from bindsnet.network import Network
4+
from bindsnet.pipeline import Pipeline
5+
from bindsnet.learning import MSTDP
6+
from bindsnet.encoding import bernoulli
7+
from bindsnet.network.topology import Connection
8+
from bindsnet.environment import GymEnvironment
9+
from bindsnet.network.nodes import Input, LIFNodes
10+
from bindsnet.pipeline.action import select_softmax
11+
12+
# Build network.
13+
network = Network(dt=1.0)
14+
15+
# Layers of neurons.
16+
inpt = Input(n=80 * 80, shape=[80, 80], traces=True)
17+
middle = LIFNodes(n=100, traces=True)
18+
out = LIFNodes(n=4, refrac=0, traces=True)
19+
20+
# Connections between layers.
21+
inpt_middle = Connection(source=inpt, target=middle, wmin=0, wmax=1e-1)
22+
middle_out = Connection(source=middle, target=out, wmin=0, wmax=1, update_rule=MSTDP, nu=1e-1, norm=0.5 * middle.n)
23+
24+
# Add all layers and connections to the network.
25+
network.add_layer(inpt, name='Input Layer')
26+
network.add_layer(middle, name='Hidden Layer')
27+
network.add_layer(out, name='Output Layer')
28+
network.add_connection(inpt_middle, source='Input Layer', target='Hidden Layer')
29+
network.add_connection(middle_out, source='Hidden Layer', target='Output Layer')
30+
31+
# Load SpaceInvaders environment.
32+
environment = GymEnvironment('BreakoutDeterministic-v4')
33+
environment.reset()
34+
35+
# Build pipeline from specified components.
36+
pipeline = Pipeline(network, environment, encoding=bernoulli,
37+
action_function=select_softmax, output='Output Layer',
38+
time=100, history_length=1, delta=1,
39+
plot_interval=1, render_interval=1)
40+
41+
42+
# Train agent for 100 episodes.
43+
print("Training: ")
44+
for i in range(100):
45+
pipeline.reset_()
46+
# initialize episode reward
47+
reward = 0
48+
while True:
49+
pipeline.step()
50+
reward += pipeline.reward
51+
if pipeline.done:
52+
break
53+
print("Episode " + str(i) + " reward:", reward)
54+
55+
# stop MSTDP
56+
pipeline.network.learning = False
57+
58+
print("Testing: ")
59+
for i in range(100):
60+
pipeline.reset_()
61+
# initialize episode reward
62+
reward = 0
63+
while True:
64+
pipeline.step()
65+
reward += pipeline.reward
66+
if pipeline.done:
67+
break
68+
print("Episode " + str(i) + " reward:", reward)

0 commit comments

Comments
 (0)