Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions benchmark/mmmu/bench_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ async def eval_mmmu(args) -> None:
client = openai.AsyncOpenAI(
api_key="sk", base_url=f"http://127.0.0.1:{args.port}/v1"
)
semaphore = asyncio.Semaphore(args.concurrency)
start = time.perf_counter()
base_url = f"http://127.0.0.1:{args.port}"

Expand All @@ -132,14 +131,22 @@ async def eval_mmmu(args) -> None:

samples = samples[: args.profile_number]

tasks = [
process_sample_with_semaphore(semaphore, client, sample, sampling_params)
for sample in samples
]

for coro in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
sample, response = await coro
process_result(response, sample, answer_dict, out_samples)
if args.concurrency == 1:
# For concurrency == 1, run in sequential mode to ensure consistent order
# this is mainly for profiling
for sample in tqdm(samples):
_, response = await process_sample(client, sample, sampling_params)
process_result(response, sample, answer_dict, out_samples)
else:
semaphore = asyncio.Semaphore(args.concurrency)
tasks = [
process_sample_with_semaphore(semaphore, client, sample, sampling_params)
for sample in samples
]

for coro in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
sample, response = await coro
process_result(response, sample, answer_dict, out_samples)

if args.profile:
print("Stopping profiler...")
Expand Down
4 changes: 3 additions & 1 deletion benchmark/mmmu/eval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class EvalArgs:
seed: int = 42
split: str = "validation"
# Default setting to make the benchmark available on A100 for most 7B models
image_pixels_limit: int = 4300000
image_pixels_limit: int = -1
result_filename: str = ""
prompt_format_file: str = "prompt_format.yaml"
dataset_path: str = "MMMU/MMMU"
Expand Down Expand Up @@ -193,6 +193,8 @@ def process_sample(i, sample):
elif sample:
samples.append(sample)

samples.sort(key=lambda x: x["final_input_prompt"])

print(
f"Skipping {skip_count} samples with large images, {round((float(skip_count) / len(dataset)) * 100, 2)}% of dataset"
)
Expand Down
Loading