-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy patheval.sh
More file actions
executable file
·49 lines (39 loc) · 1.22 KB
/
eval.sh
File metadata and controls
executable file
·49 lines (39 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
# -----------------------------
# Make executable
# chmod +x eval.sh
# Run it:
# ./eval.sh
# -----------------------------
# -----------------------------
# Initialize conda in this shell
# -----------------------------
# This line works for both Linux and macOS
eval "$(conda shell.bash hook)"
# -----------------------------
# Activate the correct environment
# -----------------------------
conda activate smol311
echo "Using conda environment: $(conda info --envs | grep '*' | awk '{print $1}')"
echo ""
# -----------------------------
# List of workloads
# -----------------------------
datasets=("astronomy" "archeology" "biomedical" "environment" "legal" "wildfire") # "environment" "legal" "wildfire"
sut="SmolagentsPDTClaude37Sonnet"
cmd="python evaluate.py"
echo "Starting evaluations for SUT: $sut"
echo ""
# -----------------------------
# Main loop
# -----------------------------
for ds in "${datasets[@]}"; do
echo "============================================"
echo "Running workload: $ds"
echo "============================================"
$cmd --sut "$sut" --workload "$ds" --use_truth_subset
echo ""
echo "Finished workload: $ds"
echo ""
done
echo "All evaluations completed."