@@ -176,7 +176,7 @@ def generate(self, data: DataProto, actor_cluster: Union[Any, Cluster], pipeline
176176
177177 def get_available_dp_rank (self ):
178178 while True :
179- # 负载均衡逻辑,期望各dp 正在处理的条数基本接近
179+ # Load balancing logic, expect the number of items being processed by each dp to be roughly similar
180180 sorted_ranks = sorted (
181181 self .load_balance_coordinator .keys (), key = lambda rank : (self .load_balance_coordinator [rank ], rank )
182182 )
@@ -210,26 +210,26 @@ def generate_opt_level_1(self, data: DataProto):
210210 )
211211 self .cluster .start_server (data = DataProto (meta_info = data .meta_info ), blocking = True )
212212
213- # 分发数据至收到target rollout 完成
214- # 无限循环,把所有的response发送给dp worker
213+ # Distribute data until target rollout completion
214+ # Infinite loop, send all responses to dp workers
215215 send_request_count = 0
216216 request_refs = []
217217 data_index_counter = itertools .count ()
218218 last_alive_check = time .time ()
219219 while not self .is_completed :
220220
221- # 探测dp worker是否存活, dp worker的server thread可能由于异常退出,造成hang
221+ # Check if dp worker is alive, dp worker's server thread may exit due to exceptions, causing hang
222222 current_time = time .time ()
223223 if current_time - last_alive_check >= self .alive_check_interval :
224224 self .cluster .add_request (command = GenerateRequestType .ALIVE_CHECK , data = DataProto ())
225225 last_alive_check = current_time
226226
227227 if send_request_count < data .batch .batch_size [0 ]:
228- # 取一个可以发送request的dp worker
228+ # Get a dp worker that can send requests
229229 dp_rank = next (self .get_available_dp_rank ())
230230
231- # 还有数据需要发送, 取需要发送的数据
232- # request_id 全局递增,否则vllm /sglang scheduler状态不对
231+ # Still have data to send, get the data that needs to be sent
232+ # request_id increments globally, otherwise vllm /sglang scheduler state is incorrect
233233 request_id = next (self .request_counter )
234234 data_index = next (data_index_counter )
235235 request_data = collate_fn ([self .data [data_index ]])
@@ -240,7 +240,7 @@ def generate_opt_level_1(self, data: DataProto):
240240 ].item ()
241241 self .request_id_2_dp_rank [request_data .meta_info ["request_id" ]] = dp_rank
242242 self .prompt_id_2_request_ids [prompt_id ].add (request_data .meta_info ["request_id" ])
243- # 需要注意上面的调用顺序, report_response中会更新request_id索引dp_rank,所以这里需要最后add request_id
243+ # Need to pay attention to the calling order above, report_response will update request_id index dp_rank, so need to add request_id last
244244 request_data .meta_info ["response_callback_fn" ] = self .response_callback_fn
245245 request_data .meta_info ["generation_config" ] = data .meta_info ["generation_config" ]
246246 request_refs .append (
@@ -257,7 +257,7 @@ def generate_opt_level_1(self, data: DataProto):
257257 request_refs = []
258258
259259 gen_metrics = self .cluster .stop_server ()
260- # generate结束时,应该收到num_return_sequences + drop_generation_num 条返回结果
260+ # When generation ends, should receive num_return_sequences + drop_generation_num return results
261261 generate_return_num = num_return_sequences + self .pipeline_config .drop_generation_num
262262 response_ids_list_of_list = []
263263 eos_token_id = None
@@ -401,7 +401,7 @@ def set_scheduler(
401401 state : Dict [str , Any ] = None ,
402402 ):
403403 """
404- GenerateScheduler可以由多个实例,不再局限于单例
404+ GenerateScheduler can have multiple instances, no longer limited to singleton
405405 """
406406 self .actor_cluster = actor_cluster
407407 self .reward_clusters = reward_clusters
@@ -466,9 +466,9 @@ def reset_status(self):
466466
467467 def get_batch (self , data : DataProto , batch_size : int ) -> DataProto :
468468 """
469- 从dataset里,按给定策略sample batch
470- 1. 常规无过滤
471- 2. 动态过滤
469+ Sample batch from dataset using given strategy
470+ 1. Regular without filtering
471+ 2. Dynamic filtering
472472 """
473473 self .batch_size = batch_size
474474 self .reset_status ()
@@ -529,7 +529,7 @@ def get_batch(self, data: DataProto, batch_size: int) -> DataProto:
529529 f"used queries: { query_use_count } query_filter_count: { self .query_filter_count } "
530530 f"response_filter_count: { self .response_filter_count } "
531531 )
532- # TODO: 这里 len(collect_data) > rollout_batch_size, 可以尝试动态扩大batch_size
532+ # TODO: Here len(collect_data) > rollout_batch_size, can try dynamically expanding batch_size
533533 batch = DataProto .concat (collect_data [: self .batch_size * num_return_sequences ])
534534 batch .meta_info ["metrics" ] = {
535535 f"scheduler/query_filter_count" : self .query_filter_count ,
@@ -538,7 +538,7 @@ def get_batch(self, data: DataProto, batch_size: int) -> DataProto:
538538 f"scheduler/query_use_count" : query_use_count ,
539539 }
540540
541- # 统计全部response metrics
541+ # Count all response metrics
542542 metrics = {}
543543 for domain , response_batches in self .response_cache .items ():
544544 response_batch = DataProto .concat (response_batches [:])
@@ -555,8 +555,8 @@ def get_batch(self, data: DataProto, batch_size: int) -> DataProto:
555555 @ray .method (concurrency_group = "multi_thread" )
556556 def report_response (self , data : DataProto ):
557557 """
558- 这里需要考虑多线程数据访问
559- data 返回可能有多条的
558+ Need to consider multi-threaded data access here
559+ Data return may have multiple entries
560560 """
561561 try :
562562 request_id = data .meta_info ["request_id" ]
@@ -577,15 +577,15 @@ def report_response(self, data: DataProto):
577577 return
578578
579579 # call reward
580- # reward worker得能支持单条数据计算 , dynamic sampling对需要batch计算reward的需要注意 ...
581- # 多域的时候, llm as judge, 需要单独为reward worker分配gpu
580+ # reward worker must support single data calculation , dynamic sampling needs attention for batch reward calculation ...
581+ # In multi-domain cases, llm as judge, need to allocate gpu separately for reward worker
582582 rewards : DataProto = ray .get (reward_worker .compute_rewards .remote (batch ))
583583 batch .union (rewards )
584584
585585 response_buffers : List [DataProto ] = []
586586 batch_expanded = [batch [[idx ]] for idx in range (output_count )]
587587
588- # response_filter, 不太需要response filter
588+ # response_filter, don't really need response filter
589589 for batch_item in batch_expanded :
590590 if self .response_filter_fn (batch_item , self .pipeline_config ):
591591 response_buffers .append (batch_item )
@@ -713,7 +713,7 @@ def expand_requests(self, data: DataProto):
713713 return target_requests
714714
715715 def check_worker_alive (self , cluster ):
716- # 探测dp worker是否存活, dp worker的server thread可能由于异常退出,造成hang
716+ # Check if dp worker is alive, dp worker's server thread may exit due to exceptions, causing hang
717717 current_time = time .time ()
718718 if current_time - self .last_alive_check >= self .alive_check_interval :
719719 cluster .add_request (command = GenerateRequestType .ALIVE_CHECK , data = DataProto ())
@@ -734,7 +734,7 @@ def check_send_new_request(self) -> bool:
734734
735735 def get_available_dp_rank (self ):
736736 while True :
737- # 负载均衡逻辑,期望各dp 正在处理的条数基本接近
737+ # Load balancing logic, expect the number of items being processed by each dp to be roughly similar
738738 sorted_ranks = sorted (
739739 self .load_balance_coordinator .keys (), key = lambda rank : (self .load_balance_coordinator [rank ], rank )
740740 )
0 commit comments