volcengine · vermouth1992 · Sep 5, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
@@ -208,6 +208,7 @@ def __init__(self, model_path: str, config: RolloutConfig, tokenizer, model_hf_c
             n=1,
             logprobs=0,  # can be set to 0 and let actor to recompute
             max_tokens=config.response_length,
+            repetition_penalty=config.get("repetition_penalty", 1.0),
         )
 
         kwargs["detokenize"] = False