Skip to content

Commit bbdf249

Browse files
committed
update genetic algo skip-unchanged logic
1 parent e332513 commit bbdf249

File tree

1 file changed

+130
-34
lines changed

1 file changed

+130
-34
lines changed

genetic_algo.py

Lines changed: 130 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,54 @@ def _resolve_source(task_dir: str, task_id: int) -> Path:
8080
return Path(paths[0])
8181

8282

83+
def _work_dir_for(
84+
task_dir: str,
85+
task_id: int,
86+
stripper_name: str,
87+
use_zopfli: bool,
88+
) -> Path:
89+
repo_root = Path(__file__).resolve().parent
90+
cache_dir = repo_root / "optimizer_results" / "genetic_algo"
91+
codec = "zopfli" if use_zopfli else "zlib"
92+
return cache_dir / f"{task_dir}-{task_id}-{stripper_name}-{codec}"
93+
94+
95+
def _matches_original_snapshot(
96+
*,
97+
task_dir: str,
98+
task_id: int,
99+
stripper_name: str,
100+
use_zopfli: bool,
101+
source_path: Path,
102+
snapshot_bytes: Optional[bytes] = None,
103+
) -> tuple[bool, bytes]:
104+
source_path = Path(source_path)
105+
if not source_path.is_absolute():
106+
source_path = source_path.resolve()
107+
108+
if snapshot_bytes is None:
109+
_, _, snapshot_bytes = _strip_source_for_snapshot(source_path)
110+
111+
work_dir = _work_dir_for(task_dir, task_id, stripper_name, use_zopfli)
112+
original_snapshot_path = work_dir / f"task{task_id:03d}_original.py"
113+
114+
try:
115+
if original_snapshot_path.exists() and original_snapshot_path.read_bytes() == snapshot_bytes:
116+
return True, snapshot_bytes
117+
except OSError:
118+
pass
119+
120+
return False, snapshot_bytes
121+
122+
123+
def _clear_ga_inputs(work_dir: Path) -> None:
124+
for name in ("input_deflate.txt", "input_variable.txt", "current_states.txt"):
125+
try:
126+
(work_dir / name).unlink()
127+
except FileNotFoundError:
128+
continue
129+
130+
83131
def _build_variable_dump(code: str) -> str:
84132
occ_text = list_var_occurrences(
85133
code,
@@ -503,7 +551,6 @@ def solve(
503551
use_zopfli: bool = True,
504552
timeout_sec: Optional[int] = None,
505553
source_override: Path | None = None,
506-
skip_if_unchanged: bool = False,
507554
) -> None:
508555
repo_root = Path(__file__).resolve().parent
509556
if source_override is not None:
@@ -552,27 +599,13 @@ def solve(
552599
if not ga_binary.exists():
553600
raise FileNotFoundError(f"geneticalgo binary not found at {ga_binary}")
554601

555-
cache_dir = repo_root / "optimizer_results" / "genetic_algo"
556-
cache_dir.mkdir(parents=True, exist_ok=True)
557-
work_dir = cache_dir / f"{task_dir}-{task_id}-{stripper_name}-{'zopfli' if use_zopfli else 'zlib'}"
602+
work_dir = _work_dir_for(task_dir, task_id, stripper_name, use_zopfli)
603+
work_dir.parent.mkdir(parents=True, exist_ok=True)
558604
output_path = work_dir / "result.deflate"
559605
py_output_path = work_dir / f"task{task_id:03d}.py"
560606

561607
work_dir.mkdir(parents=True, exist_ok=True)
562608
original_snapshot_path = work_dir / f"task{task_id:03d}_original.py"
563-
if skip_if_unchanged and original_snapshot_path.exists():
564-
try:
565-
if original_snapshot_path.read_bytes() == snapshot_bytes:
566-
print(
567-
(
568-
f"[genetic_algo] skip task {task_id:03d} {stripper_name}: "
569-
"source unchanged from snapshot"
570-
),
571-
file=sys.stderr,
572-
)
573-
return
574-
except OSError:
575-
pass
576609
try:
577610
_atomic_write_bytes(original_snapshot_path, snapshot_bytes)
578611
except OSError:
@@ -648,13 +681,30 @@ def _jobs_from_candidates(
648681
skip_if_unchanged: bool,
649682
) -> list[GAJob]:
650683
jobs: list[GAJob] = []
651-
_ = skip_if_unchanged # unused: job submission handles skip at runtime
652684
for cand in candidates:
653685
for entry in cand.entries:
654686
base_path = entry.base_path
655687
task_dir = base_path.parent.name
688+
snapshot_bytes: Optional[bytes] = None
656689
for stripper in entry.strippers:
657690
for use_zopfli in (True, False):
691+
if skip_if_unchanged:
692+
matches, snapshot_bytes = _matches_original_snapshot(
693+
task_dir=task_dir,
694+
task_id=cand.task_id,
695+
stripper_name=stripper,
696+
use_zopfli=use_zopfli,
697+
source_path=base_path,
698+
snapshot_bytes=snapshot_bytes,
699+
)
700+
if matches:
701+
codec = "zopfli" if use_zopfli else "zlib"
702+
label = f"{task_dir}/task{cand.task_id:03d}:{stripper}:{codec}"
703+
print(
704+
f"[genetic_algo] skip {label}: source unchanged from snapshot",
705+
file=sys.stderr,
706+
)
707+
continue
658708
jobs.append(
659709
GAJob(
660710
task_id=cand.task_id,
@@ -818,8 +868,6 @@ def _submit_job(
818868
executor: ThreadPoolExecutor,
819869
job_iter: Iterator[GAJob],
820870
timeout_sec: int,
821-
*,
822-
skip_if_unchanged: bool,
823871
):
824872
job = next(job_iter)
825873
future = executor.submit(
@@ -830,7 +878,6 @@ def _submit_job(
830878
use_zopfli=job.use_zopfli,
831879
timeout_sec=timeout_sec,
832880
source_override=job.base_path,
833-
skip_if_unchanged=skip_if_unchanged,
834881
)
835882
return future, job
836883

@@ -884,7 +931,6 @@ def _run_candidate_autopilot(
884931
executor,
885932
job_iter,
886933
timeout_sec,
887-
skip_if_unchanged=skip_if_unchanged,
888934
)
889935
pending[future] = job
890936

@@ -905,7 +951,6 @@ def _run_candidate_autopilot(
905951
executor,
906952
job_iter,
907953
timeout_sec,
908-
skip_if_unchanged=skip_if_unchanged,
909954
)
910955
pending[future_next] = job_next
911956
except KeyboardInterrupt:
@@ -1023,34 +1068,85 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
10231068
if timeout_sec is not None and timeout_sec <= 0:
10241069
timeout_sec = None
10251070

1071+
assert args.task_dir is not None
1072+
assert args.task_id is not None
1073+
1074+
source_path = _resolve_source(args.task_dir, args.task_id)
1075+
if not source_path.is_absolute():
1076+
source_path = source_path.resolve()
1077+
10261078
common_kwargs = {
10271079
"task_dir": args.task_dir,
10281080
"task_id": args.task_id,
10291081
"timeout_sec": timeout_sec,
1082+
"source_override": source_path,
10301083
}
10311084

1032-
if args.stripper:
1033-
solve(
1034-
stripper_name=args.stripper,
1035-
use_zopfli=args.use_zopfli,
1036-
**common_kwargs,
1037-
skip_if_unchanged=args.skip_unchanged,
1085+
snapshot_bytes_cache: Optional[bytes] = None
1086+
1087+
def should_run(stripper_name: str, use_zopfli: bool) -> bool:
1088+
nonlocal snapshot_bytes_cache
1089+
matches, snapshot_bytes_cache = _matches_original_snapshot(
1090+
task_dir=args.task_dir,
1091+
task_id=args.task_id,
1092+
stripper_name=stripper_name,
1093+
use_zopfli=use_zopfli,
1094+
source_path=source_path,
1095+
snapshot_bytes=snapshot_bytes_cache,
1096+
)
1097+
codec = "zopfli" if use_zopfli else "zlib"
1098+
label = f"{args.task_dir}/task{args.task_id:03d}:{stripper_name}:{codec}"
1099+
if matches:
1100+
print(
1101+
f"[genetic_algo] {label} snapshot matches current source",
1102+
file=sys.stderr,
1103+
)
1104+
if args.skip_unchanged:
1105+
print(
1106+
f"[genetic_algo] skip {label}: source unchanged from snapshot",
1107+
file=sys.stderr,
1108+
)
1109+
return False
1110+
return True
1111+
1112+
print(
1113+
f"[genetic_algo] {label} snapshot differs; resetting GA inputs",
1114+
file=sys.stderr,
10381115
)
1116+
_clear_ga_inputs(_work_dir_for(args.task_dir, args.task_id, stripper_name, use_zopfli))
1117+
return True
1118+
1119+
if args.stripper:
1120+
if should_run(args.stripper, args.use_zopfli):
1121+
solve(
1122+
stripper_name=args.stripper,
1123+
use_zopfli=args.use_zopfli,
1124+
**common_kwargs,
1125+
)
10391126
return 0
10401127

10411128
names = sorted(strippers.keys())
1042-
max_workers = len(names) * 2
1043-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
1129+
combos = [
1130+
(name, use_zopfli)
1131+
for name in names
1132+
for use_zopfli in (True, False)
1133+
if should_run(name, use_zopfli)
1134+
]
1135+
1136+
if not combos:
1137+
return 0
1138+
1139+
max_workers = len(names) * 2 if names else 1
1140+
worker_count = max(1, min(max_workers, len(combos)))
1141+
with ThreadPoolExecutor(max_workers=worker_count) as executor:
10441142
futures = {
10451143
executor.submit(
10461144
solve,
10471145
stripper_name=name,
10481146
use_zopfli=use_zopfli,
10491147
**common_kwargs,
1050-
skip_if_unchanged=args.skip_unchanged,
10511148
): (name, use_zopfli)
1052-
for name in names
1053-
for use_zopfli in [True, False]
1149+
for name, use_zopfli in combos
10541150
}
10551151
for future in as_completed(futures):
10561152
name, use_zopfli = futures[future]

0 commit comments

Comments
 (0)