@@ -560,7 +560,6 @@ async def benchmark(
560560 tokenizer : PreTrainedTokenizerBase ,
561561 input_requests : list [tuple [str , int , int ]],
562562 logprobs : Optional [int ],
563- best_of : int ,
564563 request_rate : float ,
565564 burstiness : float ,
566565 disable_tqdm : bool ,
@@ -592,7 +591,6 @@ async def benchmark(
592591 prompt_len = test_prompt_len ,
593592 output_len = test_output_len ,
594593 logprobs = logprobs ,
595- best_of = best_of ,
596594 multi_modal_content = test_mm_content ,
597595 ignore_eos = ignore_eos ,
598596 )
@@ -619,7 +617,6 @@ async def benchmark(
619617 prompt_len = test_prompt_len ,
620618 output_len = test_output_len ,
621619 logprobs = logprobs ,
622- best_of = best_of ,
623620 multi_modal_content = test_mm_content ,
624621 ignore_eos = ignore_eos )
625622 profile_output = await request_func (request_func_input = profile_input )
@@ -668,7 +665,6 @@ async def limited_request_func(request_func_input, pbar):
668665 prompt_len = prompt_len ,
669666 output_len = output_len ,
670667 logprobs = logprobs ,
671- best_of = best_of ,
672668 multi_modal_content = mm_content ,
673669 ignore_eos = ignore_eos )
674670 tasks .append (
@@ -686,7 +682,6 @@ async def limited_request_func(request_func_input, pbar):
686682 prompt_len = test_prompt_len ,
687683 output_len = test_output_len ,
688684 logprobs = logprobs ,
689- best_of = best_of ,
690685 )
691686 profile_output = await request_func (request_func_input = profile_input )
692687 if profile_output .success :
@@ -958,7 +953,6 @@ def main(args: argparse.Namespace):
958953 tokenizer = tokenizer ,
959954 input_requests = input_requests ,
960955 logprobs = args .logprobs ,
961- best_of = args .best_of ,
962956 request_rate = args .request_rate ,
963957 burstiness = args .burstiness ,
964958 disable_tqdm = args .disable_tqdm ,
@@ -983,7 +977,6 @@ def main(args: argparse.Namespace):
983977 result_json ["backend" ] = backend
984978 result_json ["model_id" ] = model_id
985979 result_json ["tokenizer_id" ] = tokenizer_id
986- result_json ["best_of" ] = args .best_of
987980 result_json ["num_prompts" ] = args .num_prompts
988981
989982 # Metadata
@@ -1081,13 +1074,6 @@ def main(args: argparse.Namespace):
10811074 help =
10821075 "Name or path of the tokenizer, if not using the default tokenizer." , # noqa: E501
10831076 )
1084- parser .add_argument (
1085- "--best-of" ,
1086- type = int ,
1087- default = 1 ,
1088- help = "Generates `best_of` sequences per prompt and "
1089- "returns the best one." ,
1090- )
10911077 parser .add_argument ("--use-beam-search" , action = "store_true" )
10921078 parser .add_argument (
10931079 "--num-prompts" ,
0 commit comments