Skip to content

Commit 12ae9a8

Browse files
add recording to parquet
1 parent 505abf3 commit 12ae9a8

9 files changed

Lines changed: 429 additions & 15 deletions

File tree

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,6 @@ cov/*
2424
out
2525
.idea
2626
clojush.iml
27-
.gorilla-port
27+
.gorilla-port
28+
/clojush/
29+
.ipynb_checkpoints/

project.clj

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
(defproject clojush "2.30.0-1-SNAPSHOT"
1+
(defproject clojush "2.30.0-1-SNAPSHOT"
22
:description "The Push programming language and the PushGP genetic programming
3-
system implemented in Clojure. See http://pushlanguage.com"
3+
system implemented in Clojure. See http://pushlanguage.com"
44
:license {:name "Eclipse Public License"
55
:url "http://www.eclipse.org/legal/epl-v10.html"}
6-
:dependencies [[org.clojure/clojure "1.8.0"]
6+
:dependencies [[org.clojure/clojure "1.9.0-alpha15"]
77
[org.clojars.etosch/cosmos "1.0.0"]
88
[org.clojure/math.numeric-tower "0.0.4"]
99
[org.clojure/math.combinatorics "0.1.1"]
@@ -12,8 +12,9 @@
1212
[clojure-csv "2.0.1"]
1313
[org.clojure/data.json "0.2.6"]
1414
[clj-random "0.1.7"]
15-
;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3
16-
[org.apache.commons/commons-math3 "3.2"]]
15+
;; https://mvnrepository.com/artifact/org.apache.commons/commons-math3
16+
[org.apache.commons/commons-math3 "3.2"]
17+
[ici.recorder "0.2.0"]]
1718
:plugins [[lein-codox "0.9.1"]
1819
[lein-shell "0.5.0"]
1920
[lein-gorilla "0.4.0"]
@@ -53,4 +54,3 @@
5354
;;"-XX:+UseG1GC"
5455
;:jvm-opts ["-Xmx12g" "-Xms12g" "-XX:+UseParallelGC"]
5556
:main clojush.core)
56-

run-fly

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ ssh ${fly_user}@fly.hampshire.edu /opt/pixar/tractor-blade-1.7.2/tractor-spool.p
4949
--jobcwd="${repodir}" \
5050
--priority=1 \
5151
--range 1-${n} \
52-
-c "bash -c 'env JAVA_CMD=/usr/java/latest/bin/java /share/apps/bin/lein run $lein_command > $outputdir/RANGE.out 2> $outputdir/RANGE.err'"
52+
-c "bash -c 'env JAVA_CMD=/usr/java/latest/bin/java /share/apps/bin/lein run $lein_command :label $label > $outputdir/RANGE.out 2> $outputdir/RANGE.err'"
5353

5454

5555
echo "Job ID: ${number}"

src/clojush/args.clj

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@
406406
;; When true, will exit the run when there is an individual with a zero-error vector
407407

408408
;;----------------------------------------
409-
;; Arguments related to printing JSON, EDN, or CSV logs
409+
;; Arguments related to printing JSON, EDN, CSV, or Parquet logs
410410
;;----------------------------------------
411411

412412
:print-csv-logs false
@@ -445,6 +445,9 @@
445445

446446
:json-log-program-strings false
447447
;; If true, JSON logs will include program strings for each individual.
448+
449+
:label nil
450+
;; If set, parquet will save this label as a string as metadata about the run
448451
)))
449452

450453
(defn load-push-argmap

src/clojush/core.clj

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
;; for more details.
1717

1818
(ns clojush.core
19+
(:require [clojush.pushgp.record :as r])
1920
(:use [clojush.pushgp pushgp report])
2021
(:gen-class))
2122

@@ -30,12 +31,13 @@
3031
This allows one to run an example with a call from the OS shell prompt like:
3132
lein run examples.simple-regression :population-size 3000"
3233
[& args]
34+
(r/new-run!)
3335
(println "Command line args:" (apply str (interpose \space args)))
3436
(let [param-list (map #(if (.endsWith % ".ser")
3537
(str %)
3638
(read-string %))
3739
(rest args))]
38-
(require (symbol (first args)))
40+
(require (symbol (r/config-data! [:problem-file] (first args))))
3941
(let [example-params (eval (symbol (str (first args) "/argmap")))
4042
params (merge example-params (apply sorted-map param-list))]
4143
(println "######################################")
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
(ns clojush.pushgp.parent-selection
2+
(:use [clojush random globals util])
3+
(:require [clojure.set :as set]
4+
[clojush.pushgp.record :as r]))
5+
6+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7+
;; tournament selection
8+
(defn tournament-selection
9+
"Returns an individual that does the best out of a tournament."
10+
[pop location {:keys [tournament-size trivial-geography-radius
11+
total-error-method]}]
12+
(let [tournament-set
13+
(doall
14+
(for [_ (range tournament-size)]
15+
(nth pop
16+
(if (zero? trivial-geography-radius)
17+
(lrand-int (count pop))
18+
(mod (+ location (- (lrand-int (+ 1 (* trivial-geography-radius 2)))
19+
trivial-geography-radius))
20+
(count pop))))))
21+
err-fn (case total-error-method
22+
:sum :total-error
23+
(:hah :rmse :ifs) :weighted-error
24+
(throw (Exception. (str "Unrecognized argument for total-error-method: "
25+
total-error-method))))]
26+
(reduce (fn [i1 i2] (if (< (err-fn i1) (err-fn i2)) i1 i2))
27+
tournament-set)))
28+
29+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
30+
;; lexicase selection
31+
32+
(defn retain-one-individual-per-error-vector
33+
"Retains one random individual to represent each error vector."
34+
[pop]
35+
(map lrand-nth (vals (group-by #(:errors %) pop))))
36+
37+
(defn possibly-remove-individuals-with-empty-genomes
38+
"When :autoconstuctive is truthy, and at least one individual in pop has a non-empty
39+
genome, remove those with empty genomes."
40+
[pop {:keys [autoconstructive]}]
41+
(if autoconstructive
42+
(let [with-non-empty-genomes (filter #(not (empty? (:genome %))) pop)]
43+
(if (not (empty? with-non-empty-genomes))
44+
with-non-empty-genomes
45+
pop))
46+
pop))
47+
48+
(defn youth-bias
49+
"If lexicase-youth-bias is falsy, returns pop. Otherwise, lexicase-youth-bias should
50+
be a vector of [pmin pmax] with pmin and pmax both being between 0 and 1 (inclusive)
51+
with pmin + pmax <= 1.0. Then, with probability pmin, returns individuals in pop
52+
with age @min-age; with probability pmax, returns all of pop; with probability
53+
(- 1.0 pmin pmax), selects an age cutoff uniformly from those present in the population
54+
and returns individuals with the cutoff age or lower."
55+
[pop {:keys [lexicase-youth-bias]}]
56+
(if (not lexicase-youth-bias)
57+
pop
58+
(let [rand-val (lrand)
59+
age-limit (if (<= rand-val (first lexicase-youth-bias))
60+
@min-age
61+
(if (<= rand-val (apply + lexicase-youth-bias))
62+
@max-age
63+
(lrand-nth (distinct (map :age pop)))))]
64+
(filter (fn [ind] (<= (:age ind) age-limit))
65+
pop))))
66+
67+
(defn lexicase-selection
68+
"Returns an individual that does the best on the fitness cases when considered one at a
69+
time in random order. If trivial-geography-radius is non-zero, selection is limited to
70+
parents within +/- r of location"
71+
[pop location {:keys [trivial-geography-radius] :as argmap}]
72+
(let [lower (mod (- location trivial-geography-radius) (count pop))
73+
upper (mod (+ location trivial-geography-radius) (count pop))
74+
popvec (vec pop)
75+
subpop (youth-bias
76+
(if (zero? trivial-geography-radius)
77+
pop
78+
(if (< lower upper)
79+
(subvec popvec lower (inc upper))
80+
(into (subvec popvec lower (count pop))
81+
(subvec popvec 0 (inc upper)))))
82+
argmap)]
83+
(loop [survivors (retain-one-individual-per-error-vector
84+
(possibly-remove-individuals-with-empty-genomes
85+
subpop argmap))
86+
cases (lshuffle (range (count (:errors (first subpop)))))]
87+
(if (or (empty? cases)
88+
(empty? (rest survivors))
89+
(< (lrand) (:lexicase-slippage argmap)))
90+
(lrand-nth survivors)
91+
(let [min-err-for-case (apply min (map #(nth % (first cases))
92+
(map #(:errors %) survivors)))]
93+
(recur (filter #(= (nth (:errors %) (first cases)) min-err-for-case)
94+
survivors)
95+
(rest cases)))))))
96+
97+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
98+
;; epsilon lexicase selection
99+
100+
(defn mad
101+
"returns median absolute deviation (MAD)"
102+
[x]
103+
(let [; Get median of x
104+
x-median (median x)
105+
; calculate absolute deviation from median
106+
dev (map #(Math/abs (float (- % x-median)))
107+
x)]
108+
(median dev)))
109+
110+
(defn calculate-epsilons-for-epsilon-lexicase
111+
"Calculates the epsilon values for epsilon lexicase selection. Only runs once
112+
per generation. "
113+
[pop-agents {:keys [epsilon-lexicase-epsilon]}]
114+
(when (not epsilon-lexicase-epsilon)
115+
(let [pop (map deref pop-agents)
116+
test-case-errors (apply map list (map :errors pop))
117+
meta-case-errors (apply map list (map :meta-errors pop))
118+
all-errors (concat test-case-errors meta-case-errors)
119+
epsilons (map mad all-errors)]
120+
(println "Epsilons for epsilon lexicase:"
121+
(r/generation-data! [:epsilons] epsilons))
122+
(reset! epsilons-for-epsilon-lexicase epsilons))))
123+
124+
(defn epsilon-lexicase-selection
125+
"Returns an individual that does within epsilon of the best on the fitness cases when
126+
considered one at a time in random order. If trivial-geography-radius is non-zero,
127+
selection is limited to parents within +/- r of location"
128+
[pop location {:keys [trivial-geography-radius epsilon-lexicase-epsilon]}]
129+
(let [lower (mod (- location trivial-geography-radius) (count pop))
130+
upper (mod (+ location trivial-geography-radius) (count pop))
131+
popvec (vec pop)
132+
subpop (if (zero? trivial-geography-radius)
133+
pop
134+
(if (< lower upper)
135+
(subvec popvec lower (inc upper))
136+
(into (subvec popvec lower (count pop))
137+
(subvec popvec 0 (inc upper)))))]
138+
(loop [survivors (retain-one-individual-per-error-vector subpop)
139+
cases (lshuffle (range (count (:errors (first subpop)))))]
140+
(if (or (empty? cases)
141+
(empty? (rest survivors)))
142+
(lrand-nth survivors)
143+
(let [; If epsilon-lexicase-epsilon is set in the argmap, use it for epsilon.
144+
; Otherwise, use automatic epsilon selections, which are calculated once per generation.
145+
epsilon (if epsilon-lexicase-epsilon
146+
epsilon-lexicase-epsilon
147+
(nth @epsilons-for-epsilon-lexicase (first cases)))
148+
min-err-for-case (apply min (map #(nth % (first cases))
149+
(map #(:errors %) survivors)))]
150+
(recur (filter #(<= (nth (:errors %)
151+
(first cases))
152+
(+ min-err-for-case
153+
epsilon))
154+
survivors)
155+
(rest cases)))))))
156+
157+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
158+
;; elitegroup lexicase selection
159+
160+
(defn build-elitegroups
161+
"Builds a sequence that partitions the cases into sub-sequences, with cases
162+
grouped when they produce the same set of elite individuals in the population.
163+
In addition, if group A produces population subset PS(A), and group B
164+
produces population subset PS(B), and PS(A) is a proper subset of PS(B), then
165+
group B is discarded. "
166+
[pop-agents]
167+
(println "Building case elitegroups...")
168+
(let [pop (retain-one-individual-per-error-vector (map deref pop-agents))
169+
cases (range (count (:errors (first pop))))
170+
elites (map (fn [c]
171+
(let [min-error-for-case
172+
(apply min (map #(nth % c) (map :errors pop)))]
173+
(filter #(== (nth (:errors %) c) min-error-for-case)
174+
pop)))
175+
cases)
176+
all-elitegroups (vals (group-by #(nth elites %) cases))
177+
pruned-elitegroups (filter (fn [eg]
178+
(let [e (set (nth elites (first eg)))]
179+
(not-any?
180+
(fn [eg2]
181+
(let [e2 (set (nth elites (first eg2)))]
182+
(and (not= e e2)
183+
(set/subset? e2 e))))
184+
all-elitegroups)))
185+
all-elitegroups)]
186+
(reset! elitegroups pruned-elitegroups)
187+
(println (count @elitegroups) "elitegroups:" @elitegroups)))
188+
189+
(defn elitegroup-lexicase-selection
190+
"Returns an individual produced by elitegroup lexicase selection."
191+
[pop]
192+
(loop [survivors (retain-one-individual-per-error-vector pop)
193+
cases (lshuffle (map lrand-nth @elitegroups))]
194+
(if (or (empty? cases)
195+
(empty? (rest survivors)))
196+
(lrand-nth survivors)
197+
(let [min-err-for-case (apply min (map #(nth % (first cases))
198+
(map #(:errors %) survivors)))]
199+
(recur (filter #(= (nth (:errors %) (first cases)) min-err-for-case)
200+
survivors)
201+
(rest cases))))))
202+
203+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204+
;; implicit fitness sharing
205+
206+
(defn assign-ifs-error-to-individual
207+
"Takes an individual and calculates and assigns its IFS based on the summed
208+
error across each test case."
209+
[ind summed-reward-on-test-cases]
210+
(let [ifs-reward (apply +' (map #(if (zero? %2) 1.0 (/ %1 %2))
211+
(map #(- 1.0 %) (:errors ind))
212+
summed-reward-on-test-cases))
213+
ifs-er (cond
214+
(< 1e20 ifs-reward) 0.0
215+
(zero? ifs-reward) 1e20
216+
(< 1e20 (/ 1.0 ifs-reward)) 1e20
217+
:else (/ 1.0 ifs-reward))]
218+
(assoc ind :weighted-error ifs-er)))
219+
220+
(defn calculate-implicit-fitness-sharing
221+
"Calculates the summed fitness for each test case, and then uses it to
222+
assign an implicit fitness sharing error to each individual. Assumes errors
223+
are in range [0,1] with 0 being a solution."
224+
[pop-agents {:keys [use-single-thread]}]
225+
(println "\nCalculating implicit fitness sharing errors...")
226+
(let [pop (map deref pop-agents)
227+
summed-reward-on-test-cases (map (fn [list-of-errors]
228+
(reduce +' (map #(- 1.0 %) list-of-errors)))
229+
(apply map list (map :errors pop)))]
230+
(println "Implicit fitness sharing reward per test case (lower means population performs worse):")
231+
(println summed-reward-on-test-cases)
232+
(assert (every? (fn [error] (< -0.0000001 error 1.0000001))
233+
(flatten (map :errors pop)))
234+
(str "All errors must be in range [0,1]. Please normalize them. Here are the first 20 offending errors:\n"
235+
(not-lazy (take 20 (filter (fn [error] (not (< 0.0 error 1.0)))
236+
(flatten (map :errors pop)))))))
237+
(dorun (map #((if use-single-thread swap! send)
238+
%
239+
assign-ifs-error-to-individual
240+
summed-reward-on-test-cases)
241+
pop-agents))
242+
(when-not use-single-thread (apply await pop-agents)))) ;; SYNCHRONIZE
243+
244+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
245+
;; uniform selection (i.e. no selection, for use as a baseline)
246+
247+
(defn uniform-selection
248+
"Returns an individual uniformly at random."
249+
[pop]
250+
(lrand-nth pop))
251+
252+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
253+
;; parent selection
254+
255+
(defn select
256+
"Returns a selected parent."
257+
[pop location {:keys [parent-selection print-selection-counts]
258+
:as argmap}]
259+
(let [pop-with-meta-errors (map (fn [ind] (update-in ind [:errors] concat (:meta-errors ind)))
260+
pop)
261+
selected (case parent-selection
262+
:tournament (tournament-selection pop-with-meta-errors location argmap)
263+
:lexicase (lexicase-selection pop-with-meta-errors location argmap)
264+
:epsilon-lexicase (epsilon-lexicase-selection pop-with-meta-errors location argmap)
265+
:elitegroup-lexicase (elitegroup-lexicase-selection pop-with-meta-errors)
266+
:leaky-lexicase (if (< (lrand) (:lexicase-leakage argmap))
267+
(uniform-selection pop-with-meta-errors)
268+
(lexicase-selection pop-with-meta-errors location argmap))
269+
:uniform (uniform-selection pop-with-meta-errors)
270+
(throw (Exception. (str "Unrecognized argument for parent-selection: "
271+
parent-selection))))]
272+
(when print-selection-counts
273+
(swap! selection-counts update-in [(:uuid selected)] (fn [sel-count]
274+
(if (nil? sel-count)
275+
1
276+
(inc sel-count)))))
277+
selected))

0 commit comments

Comments
 (0)