NSDie
diff --git a/‎.github/workflows/format_pr_body.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/format_pr_body.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_full.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/vllm_ascend_test_full.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/community/versioning_policy.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/community/versioning_policy.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/ut/worker/test_worker_v1.py‎
Lines changed: 6 additions & 3 deletions b/‎tests/ut/worker/test_worker_v1.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎vllm_ascend/attention/attention_v1.py‎
Lines changed: 8 additions & 1 deletion b/‎vllm_ascend/attention/attention_v1.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎vllm_ascend/attention/mla_v1.py‎
Lines changed: 8 additions & 1 deletion b/‎vllm_ascend/attention/mla_v1.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎vllm_ascend/core/scheduler.py‎
Lines changed: 8 additions & 1 deletion b/‎vllm_ascend/core/scheduler.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎vllm_ascend/distributed/mooncake/config_data.py‎
Lines changed: 9 additions & 1 deletion b/‎vllm_ascend/distributed/mooncake/config_data.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎vllm_ascend/models/qwen2_5_vl.py‎
Lines changed: 13 additions & 2 deletions b/‎vllm_ascend/models/qwen2_5_vl.py‎
Lines changed: 13 additions & 2 deletions
@@ -36,7 +36,7 @@ jobs:
 
       - name: Get vLLM version
         run: |
-          VLLM_COMMIT=83f478bb19489b41e9d208b47b4bb5a95ac171ac
+          VLLM_COMMIT=2918c1b49c88c29783c86f78d2c4221cb9622379
           echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
 
       - name: Checkout repository
 
@@ -42,7 +42,7 @@ jobs:
   lint:
     uses: ./.github/workflows/pre-commit.yml
     with:
-      vllm: 83f478bb19489b41e9d208b47b4bb5a95ac171ac
+      vllm: 2918c1b49c88c29783c86f78d2c4221cb9622379
   changes:
     runs-on: ubuntu-latest
     outputs:
@@ -83,7 +83,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
+        vllm_version: [2918c1b49c88c29783c86f78d2c4221cb9622379, v0.11.0]
     steps:
       - name: Install packages
         run: |
@@ -138,7 +138,7 @@ jobs:
     name: e2e-light
     strategy:
       matrix:
-        vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
+        vllm_version: [2918c1b49c88c29783c86f78d2c4221cb9622379, v0.11.0]
     # Note (yikun): If CI resource are limited we can split job into two chain jobs
     needs: [lint, changes]
     # only trigger e2e test after lint passed and the change is e2e related with pull request.
 
@@ -69,7 +69,7 @@ jobs:
     name: e2e-full
     strategy:
       matrix:
-        vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
+        vllm_version: [2918c1b49c88c29783c86f78d2c4221cb9622379, v0.11.0]
     needs: [changes]
     if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
     uses: ./.github/workflows/_e2e_test.yaml
 
@@ -42,7 +42,7 @@ The table below is the release compatibility matrix for vLLM Ascend release.
 For main branch of vLLM Ascend, we usually make it compatible with the latest vLLM release and a newer commit hash of vLLM. Please note that this table is usually updated. Please check it regularly.
 | vLLM Ascend | vLLM         | Python           | Stable CANN | PyTorch/torch_npu  |
 |-------------|--------------|------------------|-------------|--------------------|
-|     main    | v0.11.0/83f478bb19489b41e9d208b47b4bb5a95ac171ac | >= 3.10, < 3.12   | 8.3.RC1 | 2.7.1 / 2.7.1 |
+|     main    | v0.11.0/2918c1b49c88c29783c86f78d2c4221cb9622379 | >= 3.10, < 3.12   | 8.3.RC1 | 2.7.1 / 2.7.1 |
 
 ## Release cadence
 
 
@@ -8,6 +8,9 @@
 from tests.ut.base import TestBase
 from vllm_ascend.utils import vllm_version_is
 
+init_cached_hf_modules_path = "vllm.utils.init_cached_hf_modules" if vllm_version_is(
+    "0.11.0") else "vllm.utils.import_utils.init_cached_hf_modules"
+
 
 class TestNPUWorker(TestBase):
 
@@ -53,7 +56,7 @@ def setUp(self):
     @patch("vllm_ascend.worker.worker_v1.init_ascend_config")
     @patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
     @patch("vllm_ascend.worker.worker_v1.try_register_lib")
-    @patch("vllm.utils.init_cached_hf_modules")
+    @patch(init_cached_hf_modules_path)
     @patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
     def test_init_npu_worker_normal_case(
         self,
@@ -115,7 +118,7 @@ def test_init_npu_worker_normal_case(
     @patch("vllm_ascend.worker.worker_v1.init_ascend_config")
     @patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
     @patch("vllm_ascend.worker.worker_v1.try_register_lib")
-    @patch("vllm.utils.init_cached_hf_modules")
+    @patch(init_cached_hf_modules_path)
     @patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
     def test_init_npu_worker_with_trust_remote_code(
         self,
@@ -160,7 +163,7 @@ def test_init_npu_worker_with_trust_remote_code(
     @patch("vllm_ascend.worker.worker_v1.init_ascend_config")
     @patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
     @patch("vllm_ascend.worker.worker_v1.try_register_lib")
-    @patch("vllm.utils.init_cached_hf_modules")
+    @patch(init_cached_hf_modules_path)
     @patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
     def test_init_npu_worker_with_custom_cache_dtype(
         self,
 
@@ -31,7 +31,14 @@
                               get_decode_context_model_parallel_rank,
                               get_decode_context_model_parallel_world_size)
 from vllm.forward_context import ForwardContext, get_forward_context
-from vllm.utils import cdiv
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv
+else:
+    from vllm.utils.math_utils import cdiv
+
 from vllm.v1.attention.backends.utils import AttentionCGSupport
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import AttentionSpec
 
@@ -22,7 +22,14 @@
 from vllm.logger import logger
 from vllm.model_executor.layers.linear import (LinearBase,
                                                UnquantizedLinearMethod)
-from vllm.utils import cdiv, round_down
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv, round_down
+else:
+    from vllm.utils.math_utils import cdiv, round_down
+
 from vllm.v1.attention.backends.utils import AttentionCGSupport
 
 from vllm_ascend import envs
 
@@ -22,7 +22,14 @@
 from vllm.distributed.kv_events import KVEventBatch
 from vllm.logger import logger
 from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
-from vllm.utils import cdiv
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv
+else:
+    from vllm.utils.math_utils import cdiv
+
 from vllm.v1.core.kv_cache_manager import KVCacheBlocks
 from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
 from vllm.v1.core.sched.scheduler import Scheduler
 
@@ -9,7 +9,15 @@
 import torch
 from vllm.distributed.kv_transfer.kv_connector.v1.base import \
     KVConnectorMetadata
-from vllm.utils import cdiv, logger
+from vllm.utils import logger
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv
+else:
+    from vllm.utils.math_utils import cdiv
+
 from vllm.v1.core.sched.output import NewRequestData
 
 DEFAULT_GLOBAL_SEGMENT_SIZE = 3355443200  # 3.125 GiB
 
@@ -42,6 +42,7 @@
 from vllm.model_executor.models.utils import maybe_prefix
 from vllm.multimodal import MULTIMODAL_REGISTRY
 
+from vllm_ascend.ascend_forward_context import set_ascend_forward_context
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, is_enable_nz,
                                vllm_version_is)
 
@@ -536,7 +537,11 @@ def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
             image_embeds = image_input["image_embeds"].type(self.visual.dtype)
         else:
             pixel_values = image_input["pixel_values"].type(self.visual.dtype)
-            image_embeds = self.visual(pixel_values, grid_thw=grid_thw)
+            if vllm_version_is("0.11.0"):
+                image_embeds = self.visual(pixel_values, grid_thw=grid_thw)
+            else:
+                with set_ascend_forward_context(None, self.vllm_config):
+                    image_embeds = self.visual(pixel_values, grid_thw=grid_thw)
 
         # Split concatenated embeddings for each image item.
         merge_size = self.visual.spatial_merge_size
@@ -553,7 +558,13 @@ def _process_video_input(self, video_input) -> tuple[torch.Tensor, ...]:
         else:
             pixel_values_videos = video_input["pixel_values_videos"].type(
                 self.visual.dtype)
-            video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw)
+            if vllm_version_is("0.11.0"):
+                video_embeds = self.visual(pixel_values_videos,
+                                           grid_thw=grid_thw)
+            else:
+                with set_ascend_forward_context(None, self.vllm_config):
+                    video_embeds = self.visual(pixel_values_videos,
+                                               grid_thw=grid_thw)
 
         # Split concatenated embeddings for each video item.
         merge_size = self.visual.spatial_merge_size