@@ -89,8 +89,6 @@ def sample_sharegpt_requests(
8989 tokenizer : PreTrainedTokenizerBase ,
9090 fixed_output_len : Optional [int ] = None ,
9191) -> List [Tuple [str , int , int , None ]]:
92- if fixed_output_len is not None and fixed_output_len < 4 :
93- raise ValueError ("output_len too small" )
9492 # Load the dataset.
9593 with open (dataset_path ) as f :
9694 dataset = json .load (f )
@@ -117,7 +115,7 @@ def sample_sharegpt_requests(
117115 prompt_len = len (prompt_token_ids )
118116 output_len = len (completion_token_ids
119117 ) if fixed_output_len is None else fixed_output_len
120- if prompt_len < 4 or output_len < 4 :
118+ if prompt_len < 4 or ( fixed_output_len is None and output_len < 4 ) :
121119 # Prune too short sequences.
122120 continue
123121 if prompt_len > 1024 or prompt_len + output_len > 2048 :
@@ -228,10 +226,11 @@ def sample_hf_requests(
228226 prompt_len = len (prompt_token_ids )
229227 output_len = len (completion_token_ids
230228 ) if fixed_output_len is None else fixed_output_len
231- if prompt_len < 4 or output_len < 4 :
229+ if fixed_output_len is None and ( prompt_len < 4 or output_len < 4 ) :
232230 # Prune too short sequences.
233231 continue
234- if prompt_len > 1024 or prompt_len + output_len > 2048 :
232+ if fixed_output_len is None and \
233+ (prompt_len > 1024 or prompt_len + output_len > 2048 ):
235234 # Prune too long sequences.
236235 continue
237236
0 commit comments