2020 The standard boilerplate for invoking the main function when the script is executed.
2121"""
2222import importlib
23+ import os .path
2324
2425from typing import Annotated , Optional
2526
2930from langchain .globals import set_llm_cache
3031
3132from gpt_engineer .applications .cli .main import load_env_if_needed
33+ from gpt_engineer .benchmark .bench_config import BenchConfig
3234from gpt_engineer .benchmark .benchmarks .load import get_benchmark
3335from gpt_engineer .benchmark .run import print_results , run
3436
@@ -69,12 +71,9 @@ def main(
6971 help = "python file that contains a function called 'default_config_agent'"
7072 ),
7173 ],
72- benchmarks : Annotated [
73- str , typer .Argument (help = "benchmark name(s) separated by ','" )
74- ],
75- task_name : Annotated [
74+ bench_config : Annotated [
7675 Optional [str ], typer .Argument (help = "optional task name in benchmark" )
77- ] = None ,
76+ ] = os . path . join ( os . path . dirname ( __file__ ), "default_bench_config.toml" ) ,
7877 verbose : Annotated [
7978 bool , typer .Option (help = "print results for each task" , show_default = False )
8079 ] = False ,
@@ -88,8 +87,8 @@ def main(
8887 The file path to the Python module that contains a function called 'default_config_agent'.
8988 benchmarks : str
9089 A comma-separated string of benchmark names to run.
91- task_name : Optional[str], default=None
92- An optional task name to run within the benchmark .
90+ bench_config : Optional[str], default=default_bench_config.toml
91+ Configuration file for choosing which benchmark problems to run. See default config for more details .
9392 verbose : bool, default=False
9493 A flag to indicate whether to print results for each task.
9594
@@ -99,13 +98,27 @@ def main(
9998 """
10099 set_llm_cache (SQLiteCache (database_path = ".langchain.db" ))
101100 load_env_if_needed ()
101+ config = BenchConfig .from_toml (bench_config )
102+ print ("using config file: " + bench_config )
103+ benchmarks = list ()
104+ for specific_config_name in vars (config ):
105+ specific_config = getattr (config , specific_config_name )
106+ if hasattr (specific_config , "active" ):
107+ if specific_config .active :
108+ benchmarks .append (specific_config_name )
102109
103- benchmarks = benchmarks .split ("," )
104110 for benchmark_name in benchmarks :
105- benchmark = get_benchmark (benchmark_name )
111+ benchmark = get_benchmark (benchmark_name , config )
112+ if len (benchmark .tasks ) == 0 :
113+ print (
114+ benchmark_name
115+ + " was skipped, since no tasks are specified. Increase the number of tasks in the config file at: "
116+ + bench_config
117+ )
118+ continue
106119 agent = get_agent (path_to_agent )
107120
108- results = run (agent , benchmark , task_name , verbose = verbose )
121+ results = run (agent , benchmark , verbose = verbose )
109122 print (
110123 f"\n --- Results for agent { path_to_agent } , benchmark: { benchmark_name } ---"
111124 )
0 commit comments