volcengine
diff --git a/‎.github/workflows/e2e_ppo_trainer_megatron_sglang.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/e2e_ppo_trainer_megatron_sglang.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/sgl.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/sgl.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎recipe/one_step_off_policy/fsdp_workers.py‎
Lines changed: 6 additions & 12 deletions b/‎recipe/one_step_off_policy/fsdp_workers.py‎
Lines changed: 6 additions & 12 deletions
diff --git a/‎recipe/one_step_off_policy/megatron_workers.py‎
Lines changed: 16 additions & 15 deletions b/‎recipe/one_step_off_policy/megatron_workers.py‎
Lines changed: 16 additions & 15 deletions
diff --git a/‎recipe/sppo/sppo_worker.py‎
Lines changed: 1 addition & 3 deletions b/‎recipe/sppo/sppo_worker.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py‎
Lines changed: 12 additions & 9 deletions b/‎tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎tests/workers/rollout/test_sglang_async_rollout_mcp_tools.py‎
Lines changed: 11 additions & 8 deletions b/‎tests/workers/rollout/test_sglang_async_rollout_mcp_tools.py‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎tests/workers/rollout/test_sglang_async_rollout_search_tools.py‎
Lines changed: 21 additions & 16 deletions b/‎tests/workers/rollout/test_sglang_async_rollout_search_tools.py‎
Lines changed: 21 additions & 16 deletions
diff --git a/‎tests/workers/rollout/test_sglang_async_rollout_sf_tools.py‎
Lines changed: 11 additions & 8 deletions b/‎tests/workers/rollout/test_sglang_async_rollout_sf_tools.py‎
Lines changed: 11 additions & 8 deletions
@@ -284,6 +284,7 @@ jobs:
       - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
         run: |
           ray stop --force
+          MEGATRON_CI_DISABLE_EXPANDABLE_SEGMENTS=1 \
           ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
           PPO_MAX_TOKEN_LEN=512 FWD_MAX_TOKEN_LEN=512 \
           MAX_PROMPT_LENGTH=256 MAX_RESPONSE_LENGTH=256 \
 
@@ -12,7 +12,7 @@
 # - `special_sanity`: a suite of quick sanity tests
 # - `special_standalone`: a set of test that are designed to run in dedicated environments
 
-# Accelerators for tests 
+# Accelerators for tests
 # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
 # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
 
@@ -79,7 +79,7 @@ permissions:
 jobs:
   sgl:
     runs-on: [L20x8]
-    timeout-minutes: 20 # Increase this timeout value as needed
+    timeout-minutes: 35 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
@@ -142,4 +142,4 @@ jobs:
       - name: Test the latest SGLang Rollout async with multimodal delta
         run: |
           cd tests/workers/rollout
-          pytest -s test_sglang_async_rollout_multimodal_delta.py
+          pytest -s test_sglang_async_rollout_multimodal_delta.py
@@ -40,8 +40,10 @@
 from verl.utils.model import get_generation_config, update_model_config
 from verl.utils.profiler import DistProfiler, DistProfilerExtension, ProfilerConfig, log_gpu_memory_usage, simple_timer
 from verl.utils.profiler.performance import reduce_timing, topk_reduce_ratio_min_max
+from verl.workers.config import HFModelConfig, RolloutConfig
 from verl.workers.fsdp_workers import ActorRolloutRefWorker as ARRWorker
 from verl.workers.fsdp_workers import CriticWorker
+from verl.workers.rollout import get_rollout_class
 
 logger = logging.getLogger(__file__)
 logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))
@@ -204,20 +206,12 @@ def init_model(self):
         rollout_name = self.config.rollout.name
         assert rollout_name == "vllm"
 
-        from verl.workers.rollout.vllm_rollout import vLLMRollout
+        rollout_config: RolloutConfig = omega_conf_to_dataclass(self.config.rollout)
+        model_config: HFModelConfig = omega_conf_to_dataclass(self.config.model, dataclass_type=HFModelConfig)
 
         log_gpu_memory_usage(f"Before building {rollout_name} rollout", logger=logger)
-
-        from verl.workers.rollout.vllm_rollout import vLLMAsyncRollout
-
-        vllm_rollout_cls = vLLMRollout if self.config.rollout.mode == "sync" else vLLMAsyncRollout
-        rollout = vllm_rollout_cls(
-            model_path=local_path,
-            config=self.config.rollout,
-            tokenizer=self.tokenizer,
-            model_hf_config=actor_model_config,
-            device_mesh=rollout_device_mesh,
-            trust_remote_code=trust_remote_code,
+        rollout = get_rollout_class(rollout_config.name, rollout_config.mode)(
+            config=rollout_config, model_config=model_config, device_mesh=rollout_device_mesh
         )
         log_gpu_memory_usage(f"After building {rollout_name} rollout", logger=logger)
         from .vllm_sharding_manager import VLLMShardingManager
 
@@ -13,21 +13,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import logging
 import os
 
 import torch
 import torch.distributed
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import DictConfig, OmegaConf, open_dict
 
 from verl.single_controller.base.decorator import Dispatch, register
+from verl.utils.config import omega_conf_to_dataclass
 from verl.utils.debug import (
     log_gpu_memory_usage,
 )
 from verl.utils.device import get_device_name, get_torch_device
-from verl.utils.fs import copy_to_local
+from verl.workers.config import HFModelConfig, RolloutConfig
 from verl.workers.megatron_workers import ActorRolloutRefWorker as ARRWorker
 from verl.workers.megatron_workers import CriticWorker, RewardModelWorker
+from verl.workers.rollout import get_rollout_class
 
 logger = logging.getLogger(__file__)
 logger.setLevel(os.getenv("VERL_LOGGING_LEVEL", "WARN"))
@@ -145,8 +148,6 @@ def init_model(self):
         assert self.config.rollout.name == "vllm"
         assert self.config.rollout.mode == "sync"
 
-        from verl.workers.rollout.vllm_rollout import vLLMRollout
-
         from .vllm_sharding_manager import VLLMShardingManager
 
         # NOTE(sgm): If the QKV and gate_up projection layer are concate together in actor,
@@ -162,17 +163,17 @@ def init_model(self):
         )
         log_gpu_memory_usage("Before building vllm rollout", logger=None)
 
-        local_path = copy_to_local(self.config.model.path, use_shm=self.config.model.get("use_shm", False))
-        from verl.workers.rollout.vllm_rollout import vLLMAsyncRollout
-
-        vllm_rollout_cls = vLLMRollout if self.config.rollout.mode == "sync" else vLLMAsyncRollout
-        rollout = vllm_rollout_cls(
-            model_path=local_path,
-            config=self.config.rollout,
-            tokenizer=self.tokenizer,
-            model_hf_config=self.hf_config,
-            device_mesh=rollout_device_mesh,
-            trust_remote_code=trust_remote_code,
+        rollout_config: RolloutConfig = omega_conf_to_dataclass(self.config.rollout)
+        # (vermouth1992). self.config.model in megatron differs from that of fsdp in the override_config.
+        # To workaround this we deepcopy self.config.model and make them compatible
+        omega_model_config = copy.deepcopy(self.config.model)
+        with open_dict(omega_model_config):
+            override_config = omega_model_config.override_config.pop("model_config")
+            omega_model_config.override_config = override_config
+
+        model_config: HFModelConfig = omega_conf_to_dataclass(omega_model_config, dataclass_type=HFModelConfig)
+        rollout = get_rollout_class(rollout_config.name, rollout_config.mode)(
+            config=rollout_config, model_config=model_config, device_mesh=rollout_device_mesh
         )
         log_gpu_memory_usage("After building vllm rollout", logger=logger)
 
 
@@ -91,9 +91,7 @@ def init_model(self):
             )
 
         if self._is_rollout:
-            self.rollout, self.rollout_sharding_manager = self._build_rollout(
-                trust_remote_code=self.config.model.get("trust_remote_code", False)
-            )
+            self._build_rollout(trust_remote_code=self.config.model.get("trust_remote_code", False))
 
         if self._is_ref:
             self.ref_module_fsdp = self._build_model_optimizer(
 
@@ -19,11 +19,13 @@
 import torch.distributed
 import torch.distributed as dist
 from omegaconf import OmegaConf
-from transformers import AutoConfig, AutoTokenizer
+from transformers import AutoTokenizer
 
 from verl import DataProto
+from verl.utils.config import omega_conf_to_dataclass
 from verl.utils.distributed import initialize_global_process_group
 from verl.utils.model import compute_position_id_with_mask
+from verl.workers.config import HFModelConfig, RolloutConfig
 from verl.workers.rollout.vllm_rollout.vllm_rollout_spmd import vLLMRollout
 
 
@@ -36,7 +38,7 @@ def test_vllm_rollout_with_yarn_position_embeddings():
     model_path = os.path.expanduser("~/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN")
     config = OmegaConf.create(
         {
-            "model_path": model_path,
+            "name": "vllm",
             "prompt_length": 35000,
             "response_length": 512,
             "dtype": "bfloat16",
@@ -56,26 +58,27 @@ def test_vllm_rollout_with_yarn_position_embeddings():
                 "do_sample": False,
             },
             "tensor_model_parallel_size": 4,
-            "trust_remote_code": True,
             "calculate_log_probs": False,
             "do_sample": False,
             "temperature": 0.0,
             "max_num_batched_tokens": 35000 + 512,
         }
     )
 
-    tokenizer = AutoTokenizer.from_pretrained(config.model_path, trust_remote_code=True, padding_side="left")
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, padding_side="left")
     tokenizer.pad_token = tokenizer.eos_token
-    model_hf_config = AutoConfig.from_pretrained(config.model_path)
 
     # do_sample=False for temperate=0 deterministic
     input_dataproto = prepare_input_dataproto(tokenizer, config, validate=True, do_sample=False)
 
+    rollout_config: RolloutConfig = omega_conf_to_dataclass(config, dataclass_type=RolloutConfig)
+    model_config = HFModelConfig(path=model_path)
+    model_config.tokenizer.pad_token = tokenizer.eos_token
+
     vllm_rollout = vLLMRollout(
-        model_path=config.model_path,
-        config=config,
-        tokenizer=tokenizer,
-        model_hf_config=model_hf_config,
+        config=rollout_config,
+        model_config=model_config,
+        device_mesh=None,
     )
     # rollout
     rollout_response = vllm_rollout.generate_sequences(
 
@@ -30,6 +30,8 @@
 from verl.tools.mcp_search_tool import MCPSearchTool
 from verl.tools.schemas import ToolResponse
 from verl.tools.utils.mcp_clients.McpClientManager import MCPClientManager
+from verl.utils.config import omega_conf_to_dataclass
+from verl.workers.config import HFModelConfig, RolloutConfig
 from verl.workers.rollout.schemas import AsyncRolloutRequest, AsyncRolloutRequestStateEnum, Message
 from verl.workers.rollout.sglang_rollout.sglang_rollout import SGLangRollout
 
@@ -115,18 +117,18 @@ def get_search_messages():
 
 
 class TestRolloutWithMCPSearchTools:
+    local_model_path = "Qwen/Qwen2.5-0.5B"
+
     @pytest.fixture
     def qwen_tokenizer(self):
-        local_model_path = "Qwen/Qwen2.5-0.5B"
-        tokenizer = AutoTokenizer.from_pretrained(local_model_path, padding_side="left")
+        tokenizer = AutoTokenizer.from_pretrained(self.local_model_path, padding_side="left")
         tokenizer.pad_token = tokenizer.eos_token
         return tokenizer
 
     # we only need this for tokenizer
     @pytest.fixture
     def qwen_model_config(self):
-        local_model_path = "Qwen/Qwen2.5-0.5B"
-        config = AutoConfig.from_pretrained(local_model_path)
+        config = AutoConfig.from_pretrained(self.local_model_path)
         return config
 
     @pytest.fixture
@@ -269,11 +271,12 @@ def mock_rollout(self, search_rollout_config, qwen_tokenizer, qwen_model_config)
             patch.object(SGLangRollout, "_init_inference_engine", return_value=None),
             patch.object(SGLangRollout, "_init_sampling_params", return_value=None),
         ):
+            rollout_config: RolloutConfig = omega_conf_to_dataclass(search_rollout_config, dataclass_type=RolloutConfig)
+            model_config = HFModelConfig(path=self.local_model_path)
             rollout = SGLangRollout(
-                actor_module="",
-                config=search_rollout_config,
-                processing_class=qwen_tokenizer,
-                model_hf_config=qwen_model_config,
+                config=rollout_config,
+                model_config=model_config,
+                device_mesh=None,
             )
             rollout.sampling_params = {
                 "n": 1,
 
@@ -34,6 +34,8 @@
     ToolResponse,
 )
 from verl.tools.search_tool import SearchTool
+from verl.utils.config import omega_conf_to_dataclass
+from verl.workers.config import HFModelConfig, RolloutConfig
 from verl.workers.rollout.schemas import AsyncRolloutRequest, AsyncRolloutRequestStateEnum, Message
 from verl.workers.rollout.sglang_rollout.sglang_rollout import SGLangRollout
 
@@ -87,18 +89,18 @@ def get_search_messages():
 
 
 class TestRolloutWithSearchTools:
+    local_model_path = "Qwen/Qwen2.5-0.5B"
+
     @pytest.fixture
     def qwen_tokenizer(self):
-        local_model_path = "Qwen/Qwen2.5-0.5B"
-        tokenizer = AutoTokenizer.from_pretrained(local_model_path, padding_side="left")
+        tokenizer = AutoTokenizer.from_pretrained(self.local_model_path, padding_side="left")
         tokenizer.pad_token = tokenizer.eos_token
         return tokenizer
 
     # we only need this for tokenizer
     @pytest.fixture
     def qwen_model_config(self):
-        local_model_path = "Qwen/Qwen2.5-0.5B"
-        config = AutoConfig.from_pretrained(local_model_path)
+        config = AutoConfig.from_pretrained(self.local_model_path)
         return config
 
     @pytest.fixture
@@ -172,11 +174,12 @@ def mock_rollout(self, search_rollout_config, qwen_tokenizer, qwen_model_config)
             patch.object(SGLangRollout, "_init_inference_engine", return_value=None),
             patch.object(SGLangRollout, "_init_sampling_params", return_value=None),
         ):
+            rollout_config: RolloutConfig = omega_conf_to_dataclass(search_rollout_config, dataclass_type=RolloutConfig)
+            model_config = HFModelConfig(path=self.local_model_path)
             rollout = SGLangRollout(
-                actor_module="",
-                config=search_rollout_config,
-                processing_class=qwen_tokenizer,
-                model_hf_config=qwen_model_config,
+                config=rollout_config,
+                model_config=model_config,
+                device_mesh=None,
             )
             rollout.sampling_params = {
                 "n": 1,
@@ -193,11 +196,12 @@ def mock_rollout(self, search_rollout_config, qwen_tokenizer, qwen_model_config)
     def test_tools_registration(
         self, mock_env, mock_engine, mock_sampling, search_rollout_config, qwen_tokenizer, qwen_model_config
     ):
+        rollout_config: RolloutConfig = omega_conf_to_dataclass(search_rollout_config, dataclass_type=RolloutConfig)
+        model_config = HFModelConfig(path=self.local_model_path)
         rollout = SGLangRollout(
-            actor_module="",
-            config=search_rollout_config,
-            processing_class=qwen_tokenizer,
-            model_hf_config=qwen_model_config,
+            config=rollout_config,
+            model_config=model_config,
+            device_mesh=None,
         )
         assert len(rollout._tool_schemas) == 1
         assert "search" in rollout._tool_map.keys()
@@ -220,11 +224,12 @@ def test_rollout_req_creation(
         qwen_model_config,
         search_data_proto,
     ):
+        rollout_config: RolloutConfig = omega_conf_to_dataclass(search_rollout_config, dataclass_type=RolloutConfig)
+        model_config = HFModelConfig(path=self.local_model_path)
         rollout = SGLangRollout(
-            actor_module="",
-            config=search_rollout_config,
-            processing_class=qwen_tokenizer,
-            model_hf_config=qwen_model_config,
+            config=rollout_config,
+            model_config=model_config,
+            device_mesh=None,
         )
         req_list = rollout._preprocess_prompt_to_async_rollout_requests(search_data_proto, n=1)
         assert len(req_list) == 1
 
@@ -41,6 +41,8 @@
 )
 from verl.workers.rollout.schemas import AsyncRolloutRequest, AsyncRolloutRequestStateEnum, Message
 from verl.workers.rollout.sglang_rollout.sglang_rollout import SGLangRollout
+from verl.utils.config import omega_conf_to_dataclass
+from verl.workers.config import HFModelConfig, RolloutConfig
 
 sandbox_url = ""
 
@@ -148,18 +150,18 @@ def wrapper(*args, **kwargs):
 
 
 class TestRolloutWithTools:
+    local_model_path = "Qwen/Qwen2.5-0.5B"
+
     @pytest.fixture
     def qwen_tokenizer(self):
-        local_model_path = "Qwen/Qwen2.5-0.5B"
-        tokenizer = AutoTokenizer.from_pretrained(local_model_path, padding_side="left")
+        tokenizer = AutoTokenizer.from_pretrained(self.local_model_path, padding_side="left")
         tokenizer.pad_token = tokenizer.eos_token
         return tokenizer
 
     # we only need this for tokenizer
     @pytest.fixture
     def qwen_model_config(self):
-        local_model_path = "Qwen/Qwen2.5-0.5B"
-        config = AutoConfig.from_pretrained(local_model_path)
+        config = AutoConfig.from_pretrained(self.local_model_path)
         return config
 
     @pytest.fixture
@@ -227,11 +229,12 @@ def mock_rollout(self, sandbox_fusion_rollout_config, qwen_tokenizer, qwen_model
         with patch.object(SGLangRollout, "_init_distributed_env", return_value=None), patch.object(
             SGLangRollout, "_init_inference_engine", return_value=None
         ), patch.object(SGLangRollout, "_init_sampling_params", return_value=None):
+            rollout_config: RolloutConfig = omega_conf_to_dataclass(sandbox_fusion_rollout_config, dataclass_type=RolloutConfig)
+            model_config = HFModelConfig(path=self.local_model_path)
             rollout = SGLangRollout(
-                actor_module="",
-                config=sandbox_fusion_rollout_config,
-                processing_class=qwen_tokenizer,
-                model_hf_config=qwen_model_config,
+                config=rollout_config,
+                model_config=model_config,
+                device_mesh=None,
             )
             # set default sampling_params
             rollout.sampling_params = {
Original file line number	Diff line number	Diff line change
`@@ -91,9 +91,7 @@ def init_model(self):`
`91`	`91`	`)`
`92`	`92`
`93`	`93`	`if self._is_rollout:`
`94`		`- self.rollout, self.rollout_sharding_manager = self._build_rollout(`
`95`		`- trust_remote_code=self.config.model.get("trust_remote_code", False)`
`96`		`- )`
	`94`	`+ self._build_rollout(trust_remote_code=self.config.model.get("trust_remote_code", False))`
`97`	`95`
`98`	`96`	`if self._is_ref:`
`99`	`97`	`self.ref_module_fsdp = self._build_model_optimizer(`