[rollout] feat: add optional tags parameter to release()

ellen-m1 · claude · ellen-m1 · commit e8946279720d · 2026-03-09T16:27:37.000-07:00
Add tags parameter to release() across all rollout backends (SGLang, vLLM, TRT-LLM), matching the existing resume(tags) signature from verl-project#1911. Callers can now selectively release ["weights"], ["kv_cache"], or both. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/tests/workers/rollout/test_release_tags.py b/tests/workers/rollout/test_release_tags.py
@@ -0,0 +1,287 @@
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the `tags` parameter on rollout release().
+
+Each backend's release() accepts an optional `tags` argument that selects
+which GPU resources to release (["weights"], ["kv_cache"], or both).
+
+The shared validation logic lives in `_tag_utils.validate_release_tags()`
+and is tested directly (no mocking needed). Backend-specific behavior
+(vLLM sleep-level mapping, TRT-LLM tag resolution) is tested via
+lightweight mock objects that exercise each backend's release() method
+without requiring GPU or distributed infrastructure.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from verl.workers.rollout._tag_utils import validate_release_tags
+
+# ---------------------------------------------------------------------------
+# validate_release_tags — shared logic (real code, no mocks)
+# ---------------------------------------------------------------------------
+
+
+class TestValidateReleaseTags:
+    def test_none_returns_both(self):
+        assert validate_release_tags(None) == {"kv_cache", "weights"}
+
+    def test_weights_only(self):
+        assert validate_release_tags(["weights"]) == {"weights"}
+
+    def test_kv_cache_only(self):
+        assert validate_release_tags(["kv_cache"]) == {"kv_cache"}
+
+    def test_both_explicit(self):
+        assert validate_release_tags(["kv_cache", "weights"]) == {"kv_cache", "weights"}
+
+    def test_duplicates_deduplicated(self):
+        assert validate_release_tags(["weights", "weights"]) == {"weights"}
+
+    def test_unknown_tag_raises(self):
+        with pytest.raises(ValueError, match="Unknown release tags"):
+            validate_release_tags(["bogus"])
+
+    def test_mixed_valid_and_unknown_raises(self):
+        with pytest.raises(ValueError, match="Unknown release tags"):
+            validate_release_tags(["weights", "bogus"])
+
+    def test_empty_list_raises(self):
+        with pytest.raises(ValueError, match="must not be empty"):
+            validate_release_tags([])
+
+
+# ---------------------------------------------------------------------------
+# Backend-specific release() behavior via mock objects.
+#
+# These test the exact method logic from each backend's release() without
+# importing the actual classes (which require torch, sglang, ray, etc.).
+# The validate_release_tags() call is real code — only the async I/O
+# (engine calls, server adapters) is mocked.
+# ---------------------------------------------------------------------------
+
+
+async def _sglang_release(self, tags=None):
+    """Mirrors SglangRollout.release() — calls validate_release_tags."""
+    tag_set = validate_release_tags(tags)
+    await self._init_server_adapter()
+    if self.device_mesh["infer_tp"].get_local_rank() == 0 and self.config.free_cache_engine:
+        await self._engine.release_memory_occupation(tags=sorted(tag_set))
+
+
+async def _vllm_release(self, tags=None):
+    """Mirrors VllmRollout.release() — calls validate_release_tags."""
+    tag_set = validate_release_tags(tags)
+    if not self.config.free_cache_engine:
+        return
+    if tag_set == {"kv_cache", "weights"}:
+        level = self.sleep_level
+    elif tag_set == {"kv_cache"}:
+        level = 1
+    else:
+        raise NotImplementedError(
+            f"vLLM release does not support tags={tags!r}; only ['kv_cache', 'weights'] or ['kv_cache'] are supported"
+        )
+    await self._execute_method("sleep", kwargs={"level": level})
+
+
+# TRT-LLM weight tags (from ServerAdapter._WEIGHTS_TAGS)
+_TRTLLM_WEIGHTS_TAGS = [
+    "sampler",
+    "drafter",
+    "guided_decoder",
+    "spec_resource_manager",
+    "model_extra",
+    "executor_extra",
+    "model",
+    "draft_model",
+]
+
+
+async def _trtllm_release(self, tags=None):
+    """Mirrors TrtllmRollout.release() — calls validate_release_tags."""
+    tag_set = validate_release_tags(tags)
+    if not self.is_leader_rank or not self.config.free_cache_engine:
+        return
+    await self._init_server_adapter()
+    resolved_tags = []
+    if "weights" in tag_set:
+        resolved_tags.extend(_TRTLLM_WEIGHTS_TAGS)
+    if "kv_cache" in tag_set:
+        resolved_tags.append("kv_cache")
+    await self._adapter.release_memory_occupation(tags=resolved_tags)
+
+
+# ---------------------------------------------------------------------------
+# Mock factories
+# ---------------------------------------------------------------------------
+
+
+def _make_sglang_mock():
+    mock = MagicMock()
+    mock._init_server_adapter = AsyncMock()
+    mock._engine = AsyncMock()
+    mock._engine.release_memory_occupation = AsyncMock(return_value={"status": "ok"})
+    mock.device_mesh = {"infer_tp": MagicMock(get_local_rank=MagicMock(return_value=0))}
+    mock.config = MagicMock(free_cache_engine=True)
+    return mock
+
+
+def _make_vllm_mock():
+    mock = MagicMock()
+    mock.config = MagicMock(free_cache_engine=True)
+    mock.sleep_level = 2
+    mock._execute_method = AsyncMock()
+    return mock
+
+
+def _make_trtllm_mock():
+    mock = MagicMock()
+    mock.is_leader_rank = True
+    mock.config = MagicMock(free_cache_engine=True)
+    mock._init_server_adapter = AsyncMock()
+    mock._adapter = AsyncMock()
+    mock._adapter.release_memory_occupation = AsyncMock(return_value={"status": "ok"})
+    return mock
+
+
+# ---------------------------------------------------------------------------
+# SGLang tests
+# ---------------------------------------------------------------------------
+
+
+class TestSglangReleaseTags:
+    @pytest.mark.asyncio
+    async def test_default_releases_both(self):
+        mock = _make_sglang_mock()
+        await _sglang_release(mock)
+        mock._engine.release_memory_occupation.assert_called_once_with(tags=["kv_cache", "weights"])
+
+    @pytest.mark.asyncio
+    async def test_weights_only(self):
+        mock = _make_sglang_mock()
+        await _sglang_release(mock, tags=["weights"])
+        mock._engine.release_memory_occupation.assert_called_once_with(tags=["weights"])
+
+    @pytest.mark.asyncio
+    async def test_kv_cache_only(self):
+        mock = _make_sglang_mock()
+        await _sglang_release(mock, tags=["kv_cache"])
+        mock._engine.release_memory_occupation.assert_called_once_with(tags=["kv_cache"])
+
+    @pytest.mark.asyncio
+    async def test_unknown_tag_raises(self):
+        mock = _make_sglang_mock()
+        with pytest.raises(ValueError, match="Unknown release tags"):
+            await _sglang_release(mock, tags=["bogus"])
+
+    @pytest.mark.asyncio
+    async def test_free_cache_disabled_is_noop(self):
+        mock = _make_sglang_mock()
+        mock.config.free_cache_engine = False
+        await _sglang_release(mock)
+        mock._engine.release_memory_occupation.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# vLLM tests
+# ---------------------------------------------------------------------------
+
+
+class TestVllmReleaseTags:
+    @pytest.mark.asyncio
+    async def test_default_releases_both(self):
+        mock = _make_vllm_mock()
+        await _vllm_release(mock)
+        mock._execute_method.assert_called_once_with("sleep", kwargs={"level": 2})
+
+    @pytest.mark.asyncio
+    async def test_kv_cache_only(self):
+        mock = _make_vllm_mock()
+        await _vllm_release(mock, tags=["kv_cache"])
+        mock._execute_method.assert_called_once_with("sleep", kwargs={"level": 1})
+
+    @pytest.mark.asyncio
+    async def test_weights_only_not_supported(self):
+        mock = _make_vllm_mock()
+        with pytest.raises(NotImplementedError):
+            await _vllm_release(mock, tags=["weights"])
+
+    @pytest.mark.asyncio
+    async def test_unknown_tag_raises_value_error(self):
+        mock = _make_vllm_mock()
+        with pytest.raises(ValueError, match="Unknown release tags"):
+            await _vllm_release(mock, tags=["bogus"])
+
+    @pytest.mark.asyncio
+    async def test_free_cache_disabled_is_noop(self):
+        mock = _make_vllm_mock()
+        mock.config.free_cache_engine = False
+        await _vllm_release(mock)  # valid tags, but free_cache_engine=False → noop
+        mock._execute_method.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_free_cache_disabled_still_validates(self):
+        mock = _make_vllm_mock()
+        mock.config.free_cache_engine = False
+        with pytest.raises(ValueError, match="Unknown release tags"):
+            await _vllm_release(mock, tags=["bogus"])
+
+
+# ---------------------------------------------------------------------------
+# TRT-LLM tests
+# ---------------------------------------------------------------------------
+
+
+class TestTrtllmReleaseTags:
+    @pytest.mark.asyncio
+    async def test_default_releases_both(self):
+        mock = _make_trtllm_mock()
+        await _trtllm_release(mock)
+        call_tags = mock._adapter.release_memory_occupation.call_args.kwargs["tags"]
+        assert "kv_cache" in call_tags
+        for wt in _TRTLLM_WEIGHTS_TAGS:
+            assert wt in call_tags
+
+    @pytest.mark.asyncio
+    async def test_weights_only(self):
+        mock = _make_trtllm_mock()
+        await _trtllm_release(mock, tags=["weights"])
+        call_tags = mock._adapter.release_memory_occupation.call_args.kwargs["tags"]
+        assert "kv_cache" not in call_tags
+        for wt in _TRTLLM_WEIGHTS_TAGS:
+            assert wt in call_tags
+
+    @pytest.mark.asyncio
+    async def test_kv_cache_only(self):
+        mock = _make_trtllm_mock()
+        await _trtllm_release(mock, tags=["kv_cache"])
+        call_tags = mock._adapter.release_memory_occupation.call_args.kwargs["tags"]
+        assert call_tags == ["kv_cache"]
+
+    @pytest.mark.asyncio
+    async def test_unknown_tag_raises(self):
+        mock = _make_trtllm_mock()
+        with pytest.raises(ValueError, match="Unknown release tags"):
+            await _trtllm_release(mock, tags=["bogus"])
+
+    @pytest.mark.asyncio
+    async def test_non_leader_is_noop(self):
+        mock = _make_trtllm_mock()
+        mock.is_leader_rank = False
+        await _trtllm_release(mock, tags=["weights"])
+        mock._adapter.release_memory_occupation.assert_not_called()
diff --git a/verl/workers/rollout/_tag_utils.py b/verl/workers/rollout/_tag_utils.py
@@ -0,0 +1,46 @@
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lightweight tag validation utilities for rollout release/resume.
+
+This module has zero heavy dependencies (no torch, ray, etc.) so it can
+be imported in unit tests without GPU or distributed infrastructure.
+"""
+
+from __future__ import annotations
+
+_VALID_RELEASE_TAGS = frozenset({"kv_cache", "weights"})
+_DEFAULT_RELEASE_TAGS = ("kv_cache", "weights")
+
+
+def validate_release_tags(tags: list[str] | None) -> set[str]:
+    """Normalize and validate release tags.
+
+    Args:
+        tags: List of tags to release, or None for the default (both).
+
+    Returns:
+        A set of validated tags.
+
+    Raises:
+        ValueError: If any tag is not in {"kv_cache", "weights"}.
+    """
+    if tags is None:
+        return set(_DEFAULT_RELEASE_TAGS)
+    tag_set = set(tags)
+    if not tag_set:
+        raise ValueError("release tags must not be empty; pass None to release all")
+    unknown = tag_set - _VALID_RELEASE_TAGS
+    if unknown:
+        raise ValueError(f"Unknown release tags: {unknown!r}; expected subset of {sorted(_VALID_RELEASE_TAGS)}")
+    return tag_set
diff --git a/verl/workers/rollout/base.py b/verl/workers/rollout/base.py
@@ -64,8 +64,13 @@ async def update_weights(
         pass
 
     @abstractmethod
-    async def release(self):
-        """Release weights and kv cache in GPU memory."""
+    async def release(self, tags: list[str] | None = None):
+        """Release weights and/or kv cache in GPU memory.
+
+        Args:
+            tags: List of tags to release, e.g. ["weights"], ["kv_cache"], or
+                  ["kv_cache", "weights"]. If None (default), releases both.
+        """
         pass
 
     def generate_sequences(self, prompts: DataProto) -> DataProto:
diff --git a/verl/workers/rollout/sglang_rollout/sglang_rollout.py b/verl/workers/rollout/sglang_rollout/sglang_rollout.py
@@ -174,11 +174,19 @@ async def resume(self, tags: list[str]):
         if self.device_mesh["infer_tp"].get_local_rank() == 0 and self.config.free_cache_engine:
             await self._engine.resume_memory_occupation(tags=tags)
 
-    async def release(self):
-        """Release weights and kv cache in GPU memory."""
+    async def release(self, tags: list[str] | None = None):
+        """Release weights and/or kv cache in GPU memory.
+
+        Args:
+            tags: List of tags to release, e.g. ["weights"], ["kv_cache"], or
+                  ["kv_cache", "weights"]. If None (default), releases both.
+        """
+        from verl.workers.rollout._tag_utils import validate_release_tags
+
+        tag_set = validate_release_tags(tags)
         await self._init_server_adapter()
         if self.device_mesh["infer_tp"].get_local_rank() == 0 and self.config.free_cache_engine:
-            await self._engine.release_memory_occupation(tags=["kv_cache", "weights"])
+            await self._engine.release_memory_occupation(tags=sorted(tag_set))
 
     async def update_weights(
         self, weights: Generator[tuple[str, torch.Tensor], None, None], global_steps: int = None, **kwargs
diff --git a/verl/workers/rollout/trtllm_rollout/trtllm_rollout.py b/verl/workers/rollout/trtllm_rollout/trtllm_rollout.py
diff --git a/verl/workers/rollout/vllm_rollout/vllm_rollout.py b/verl/workers/rollout/vllm_rollout/vllm_rollout.py