[rollout] feat: add rollout config (volcengine#3010)

vermouth1992 · techkang · commit b80ae1649a21 · 2025-08-15T20:17:10.000+08:00
### What does this PR do? - Add rollout config ### Checklist Before Starting - [ ] Search for similar PRs. Paste at least one query link here: ... - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. ### API and Usage Example > Demonstrate how the API changes if any, and provide usage example(s) if possible. ```python # Add code snippet or script demonstrating how to use this ``` ### Design & Code Changes > Demonstrate the high-level design if this PR is complex, and list the specific changes. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).)
diff --git a/tests/utils/test_config_on_cpu.py b/tests/utils/test_config_on_cpu.py
@@ -17,19 +17,20 @@
 
 from omegaconf import OmegaConf
 
+from verl.base_config import BaseConfig
 from verl.utils import omega_conf_to_dataclass
 
 
 @dataclass
-class TestDataclass:
-    hidden_size: int
-    activation: str
+class TestDataclass(BaseConfig):
+    hidden_size: int = 0
+    activation: str = "relu"
 
 
 @dataclass
-class TestTrainConfig:
-    batch_size: int
-    model: TestDataclass
+class TestTrainConfig(BaseConfig):
+    batch_size: int = 0
+    model: TestDataclass = field(default_factory=TestDataclass)
     override_config: dict = field(default_factory=dict)
 
 
@@ -79,7 +80,7 @@ def test_command_with_override(self):
 
         # Run the command
         result = subprocess.run(
-            ["python3", "scripts/print_cfg.py", "+critic.profiler.extra.any_key=val"],
+            ["python3", "scripts/print_cfg.py"],
             capture_output=True,
             text=True,
         )
@@ -90,7 +91,6 @@ def test_command_with_override(self):
         # Verify the output contains expected config information
         self.assertIn("critic", result.stdout)
         self.assertIn("profiler", result.stdout)
-        self.assertIn("extra={'any_key': 'val'}", result.stdout)
 
 
 if __name__ == "__main__":
diff --git a/tests/utils/test_nvtx_profile.py b/tests/utils/test_nvtx_profile.py
@@ -56,7 +56,7 @@ def test_frozen_config(self):
         from verl.utils.profiler.config import ProfilerConfig
 
         # Create a new ProfilerConfig instance
-        config = ProfilerConfig(all_ranks=False, ranks=[0], extra={"key": "value"})
+        config = ProfilerConfig(all_ranks=False, ranks=[0])
 
         with self.assertRaises(FrozenInstanceError):
             config.all_ranks = True
@@ -70,10 +70,6 @@ def test_frozen_config(self):
         with self.assertRaises(TypeError):
             config["ranks"] = [1, 2, 3]
 
-        assert config["extra"]["key"] == "value"
-        config["extra"]["key"] = "value2"
-        assert config["extra"]["key"] == "value2"
-
 
 class TestNsightSystemsProfiler(unittest.TestCase):
     """Test suite for NsightSystemsProfiler functionality.
diff --git a/verl/base_config.py b/verl/base_config.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import collections
-from dataclasses import FrozenInstanceError, dataclass, field, fields
+from dataclasses import FrozenInstanceError, dataclass, fields
 from typing import Any
 
 
@@ -27,8 +27,8 @@ class BaseConfig(collections.abc.Mapping):
     This allows instances of this class to be used like dictionaries.
     """
 
-    _mutable_fields = {"extra"}
-    extra: dict[str, Any] = field(default_factory=dict)
+    _mutable_fields = set()
+    _target_: str = ""
 
     def __setattr__(self, name: str, value):
         """Set the value of an attribute. Check if the attr is mutable before setting the value."""
diff --git a/verl/trainer/config/_generated_ppo_megatron_trainer.yaml b/verl/trainer/config/_generated_ppo_megatron_trainer.yaml
@@ -138,6 +138,7 @@ actor_rollout_ref:
       use_mbridge: ${oc.select:actor_rollout_ref.actor.megatron.use_mbridge,False}
     load_weight: true
   rollout:
+    _target_: verl.workers.config.RolloutConfig
     name: ???
     mode: sync
     temperature: 1.0
@@ -170,12 +171,14 @@ actor_rollout_ref:
       sglang:
         attention_backend: null
     val_kwargs:
+      _target_: verl.workers.config.SamplingConfig
       top_k: -1
       top_p: 1.0
       temperature: 0
       'n': 1
       do_sample: false
     multi_turn:
+      _target_: verl.workers.config.MultiTurnConfig
       enable: false
       max_assistant_turns: null
       tool_config_path: null
@@ -189,13 +192,16 @@ actor_rollout_ref:
       format: hermes
     calculate_log_probs: false
     agent:
+      _target_: verl.workers.config.AgentLoopConfig
       num_workers: 8
       agent_loop_config_path: null
       custom_async_server:
+        _target_: verl.workers.config.CustomAsyncServerConfig
         path: null
         name: null
     update_weights_bucket_megabytes: 512
     trace:
+      _target_: verl.workers.config.TraceConfig
       backend: null
       token2text: false
     skip_rollout: false
diff --git a/verl/trainer/config/_generated_ppo_trainer.yaml b/verl/trainer/config/_generated_ppo_trainer.yaml
@@ -113,6 +113,7 @@ actor_rollout_ref:
     entropy_from_logits_with_chunking: false
     entropy_checkpointing: false
   rollout:
+    _target_: verl.workers.config.RolloutConfig
     name: ???
     mode: sync
     temperature: 1.0
@@ -145,12 +146,14 @@ actor_rollout_ref:
       sglang:
         attention_backend: null
     val_kwargs:
+      _target_: verl.workers.config.SamplingConfig
       top_k: -1
       top_p: 1.0
       temperature: 0
       'n': 1
       do_sample: false
     multi_turn:
+      _target_: verl.workers.config.MultiTurnConfig
       enable: false
       max_assistant_turns: null
       tool_config_path: null
@@ -164,13 +167,16 @@ actor_rollout_ref:
       format: hermes
     calculate_log_probs: false
     agent:
+      _target_: verl.workers.config.AgentLoopConfig
       num_workers: 8
       agent_loop_config_path: null
       custom_async_server:
+        _target_: verl.workers.config.CustomAsyncServerConfig
         path: null
         name: null
     update_weights_bucket_megabytes: 512
     trace:
+      _target_: verl.workers.config.TraceConfig
       backend: null
       token2text: false
     skip_rollout: false
diff --git a/verl/trainer/config/generation.yaml b/verl/trainer/config/generation.yaml
@@ -14,6 +14,7 @@ model:
   path: ~/models/Qwen2-7B-Instruct
   external_lib: null
 rollout:
+  _target_: verl.workers.config.RolloutConfig
   name: vllm
   mode: sync # sync: LLM, async: AsyncLLM
   temperature: 1.0
diff --git a/verl/trainer/config/rollout/rollout.yaml b/verl/trainer/config/rollout/rollout.yaml
@@ -1,3 +1,6 @@
+# Target class for this configuration
+_target_: verl.workers.config.RolloutConfig
+
 # actor_rollout_ref.rollout.name: hf/vllm/sglang. The default value will be removed in the future
 name: ???
 
@@ -103,6 +106,9 @@ engine_kwargs:
 # Sampling parameters used during validation.
 val_kwargs:
 
+  # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
+  _target_: verl.workers.config.SamplingConfig
+
   # sampling parameters for validation
   # Top-k sampling parameter. -1 for vLLM rollout, 0 for HF rollout.
   top_k: -1
@@ -122,6 +128,9 @@ val_kwargs:
 # Multi-turn interaction config for tools or chat.
 multi_turn:
 
+  # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
+  _target_: verl.workers.config.MultiTurnConfig
+
   # set to True for multi-turn tool interaction tasks; should set rollout.name to sglang as well
   enable: False
 
@@ -170,6 +179,9 @@ calculate_log_probs: False
 # [Experimental] agent loop based rollout configs
 agent:
 
+  # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
+  _target_: verl.workers.config.AgentLoopConfig
+
   # Number of agent loop workers
   num_workers: 8
 
@@ -188,6 +200,9 @@ agent:
   # custom async server configs
   custom_async_server:
 
+    # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
+    _target_: verl.workers.config.CustomAsyncServerConfig
+
     # Path to the custom async server implementation
     path: null
 
@@ -211,6 +226,9 @@ update_weights_bucket_megabytes: 512
 # trace rollout data
 trace:
 
+  # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
+  _target_: verl.workers.config.TraceConfig
+
   # trace backend, support mlflow, weave
   backend: null
 
diff --git a/verl/utils/config.py b/verl/utils/config.py
@@ -53,8 +53,10 @@ def omega_conf_to_dataclass(config: DictConfig | dict, dataclass_type: Optional[
         raise ValueError(f"{dataclass_type} must be a dataclass")
     cfg = OmegaConf.create(config)  # in case it's a dict
     # pop _target_ to avoid hydra instantiate error, as most dataclass do not have _target_
-    if "_target_" in cfg:
-        cfg.pop("_target_")
+    # Updated (vermouth1992) We add _target_ to BaseConfig so that it is compatible.
+    # Otherwise, this code path can't support recursive instantiation.
+    # if "_target_" in cfg:
+    #     cfg.pop("_target_")
     cfg_from_dataclass = OmegaConf.structured(dataclass_type)
     # let cfg override the existing vals in `cfg_from_dataclass`
     cfg_merged = OmegaConf.merge(cfg_from_dataclass, cfg)
diff --git a/verl/workers/config/__init__.py b/verl/workers/config/__init__.py
@@ -16,6 +16,7 @@
 from .actor import *  # noqa
 from .engine import *  # noqa
 from .optimizer import *  # noqa
-from . import actor, critic, engine, optimizer
+from .rollout import *  # noqa
+from . import actor, critic, engine, optimizer, rollout
 
-__all__ = actor.__all__ + critic.__all__ + engine.__all__ + optimizer.__all__
+__all__ = actor.__all__ + critic.__all__ + engine.__all__ + optimizer.__all__ + rollout.__all__
diff --git a/verl/workers/config/rollout.py b/verl/workers/config/rollout.py
@@ -0,0 +1,141 @@
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+from verl.base_config import BaseConfig
+from verl.utils.profiler import ProfilerConfig
+
+__all__ = [
+    "SamplingConfig",
+    "MultiTurnConfig",
+    "CustomAsyncServerConfig",
+    "AgentLoopConfig",
+    "TraceConfig",
+    "RolloutConfig",
+]
+
+
+@dataclass
+class SamplingConfig(BaseConfig):
+    temperature: float = 1.0
+    top_k: int = -1
+    top_p: float = 1.0
+    do_sample: bool = True
+    n: int = 1
+
+
+@dataclass
+class MultiTurnConfig(BaseConfig):
+    _mutable_fields = {"max_assistant_turns", "max_user_turns"}
+
+    enable: bool = False
+    max_assistant_turns: Optional[int] = None
+    tool_config_path: Optional[str] = None
+    max_user_turns: Optional[int] = None
+    max_parallel_calls: int = 1
+    max_tool_response_length: int = 256
+    tool_response_truncate_side: str = "middle"
+    interaction_config_path: Optional[str] = None
+    use_inference_chat_template: bool = False
+    tokenization_sanity_check_mode: str = "strict"
+    format: str = "hermes"
+
+
+@dataclass
+class CustomAsyncServerConfig(BaseConfig):
+    path: Optional[str] = None
+    name: Optional[str] = None
+
+
+@dataclass
+class AgentLoopConfig(BaseConfig):
+    num_workers: int = 8
+    agent_loop_config_path: Optional[str] = None
+    custom_async_server: CustomAsyncServerConfig = field(default_factory=CustomAsyncServerConfig)
+
+
+@dataclass
+class TraceConfig(BaseConfig):
+    backend: Optional[str] = None
+    token2text: bool = False
+
+
+@dataclass
+class RolloutConfig(BaseConfig):
+    _mutable_fields = {"max_model_len"}
+
+    name: Optional[str] = None
+    mode: str = "sync"
+
+    temperature: float = 1.0
+    top_k: int = -1
+    top_p: float = 1.0
+    do_sample: bool = True
+    n: int = 1
+
+    prompt_length: int = 512
+    response_length: int = 512
+
+    dtype: str = "bfloat16"
+    gpu_memory_utilization: float = 0.5
+    ignore_eos: bool = False
+    enforce_eager: bool = True
+    cudagraph_capture_sizes: Optional[list] = None
+    free_cache_engine: bool = True
+    tensor_model_parallel_size: int = 2
+    max_num_batched_tokens: int = 8192
+
+    # TODO: enable train_kwargs
+    # train_sampling_config: SamplingConfig = field(default_factory=SamplingConfig)
+
+    val_kwargs: SamplingConfig = field(default_factory=SamplingConfig)
+
+    max_model_len: Optional[int] = None
+    max_num_seqs: int = 1024
+
+    # note that the logprob computation should belong to the actor
+    log_prob_micro_batch_size: Optional[int] = None
+    log_prob_micro_batch_size_per_gpu: Optional[int] = None
+    log_prob_use_dynamic_bsz: bool = False
+    log_prob_max_token_len_per_gpu: int = 16384
+
+    disable_log_stats: bool = True
+
+    multi_stage_wake_up: bool = False
+    engine_kwargs: dict = field(default_factory=dict)
+
+    calculate_log_probs: bool = False
+
+    agent: AgentLoopConfig = field(default_factory=AgentLoopConfig)
+
+    trace: TraceConfig = field(default_factory=TraceConfig)
+
+    multi_turn: MultiTurnConfig = field(default_factory=MultiTurnConfig)
+
+    update_weights_bucket_megabytes: int = 512
+
+    skip_rollout: bool = False
+
+    skip_dump_dir: str = "/tmp/rollout_dump"
+
+    profiler: ProfilerConfig = field(default_factory=ProfilerConfig)
+
+    enable_chunked_prefill: bool = True
+    load_format: str = "dummy_dtensor"
+
+    layered_summon: bool = False
+
+    layer_name_map: dict = field(default_factory=dict)
diff --git a/verl/workers/fsdp_workers.py b/verl/workers/fsdp_workers.py
diff --git a/verl/workers/megatron_workers.py b/verl/workers/megatron_workers.py
diff --git a/verl/workers/rollout/sglang_rollout/sglang_rollout.py b/verl/workers/rollout/sglang_rollout/sglang_rollout.py
diff --git a/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py b/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py