-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathlog_parser.py
More file actions
112 lines (99 loc) · 3.67 KB
/
log_parser.py
File metadata and controls
112 lines (99 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
import csv
from datetime import datetime
from pathlib import Path
LOG_DIR = Path("agent_logs")
OUT_FILE = "run_summary.csv"
MODEL_NAME = "claude-3-5-haiku-20241022" # update this for each model run
def parse_timestamp(line):
try:
return datetime.strptime(line.split(" - ")[0], "%Y-%m-%d %H:%M:%S,%f")
except Exception:
return None
def parse_logs():
prep_log = (LOG_DIR / "PrepAgent.log").read_text()
md_log = (LOG_DIR / "MDAgent.log").read_text()
# --- Step 1: Identify runs and their metadata ---
runs = []
current_run = {}
for line in prep_log.splitlines():
if "PrepAgent initialized." in line:
if current_run:
runs.append(current_run)
current_run = {"tools": set(), "start_time": parse_timestamp(line)}
elif "User input:" in line:
current_run["protein"] = line.split("User input:")[1].strip()
elif "User requested ligand:" in line:
current_run["ligand"] = line.split("User requested ligand:")[1].strip()
elif "Executing tool:" in line:
tool_match = re.search(r"Executing tool:\s*(\w+)", line)
if tool_match:
current_run["tools"].add(tool_match.group(1))
if current_run:
runs.append(current_run)
# --- Step 2: Parse MDAgent log ---
md_lines = md_log.splitlines()
md_runs = []
current = None
for line in md_lines:
if "MDAgent initialized." in line:
if current:
md_runs.append(current)
current = {
"start_time": parse_timestamp(line),
"iterations": 0,
"tools_called": set(),
"attempted": 0,
"success": 0,
}
elif "Logging agent iteration" in line and current:
current["iterations"] += 1
elif "Executing tool:" in line and current:
current["attempted"] += 1
match = re.search(r"Executing tool:\s*(\w+)", line)
if match:
current["tools_called"].add(match.group(1))
elif "Tool result:" in line and current:
if "Error" not in line:
current["success"] += 1
elif "MD Pipeline completed successfully" in line and current:
current["end_time"] = parse_timestamp(line)
if current:
md_runs.append(current)
# --- Step 3: Merge PrepAgent + MDAgent runs by order ---
rows = []
for i, run in enumerate(runs):
md = md_runs[i] if i < len(md_runs) else {}
start = md.get("start_time")
end = md.get("end_time")
total_time = (end - start).total_seconds() if start and end else None
rows.append(
{
"Model": MODEL_NAME,
"Protein": run.get("protein", "Unknown"),
"Ligand": run.get("ligand", "None"),
"Iterations": md.get("iterations", 0),
"Total Time (s)": total_time,
"Subtasks Attempted": md.get("attempted", 0),
"Subtasks Successful": md.get("success", 0),
"Tools Called": ", ".join(sorted(md.get("tools_called", run["tools"]))),
}
)
# --- Step 4: Write to CSV ---
fieldnames = [
"Model",
"Protein",
"Ligand",
"Iterations",
"Total Time (s)",
"Subtasks Attempted",
"Subtasks Successful",
"Tools Called",
]
with open(OUT_FILE, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
print(f"Summary written to {OUT_FILE}")
if __name__ == "__main__":
parse_logs()