Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ jobs:
run: |
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
# the test separately.
set +e

pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
pytest -sv tests/e2e/singlecard/test_aclgraph.py
Expand Down Expand Up @@ -167,6 +168,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'light' }}
run: |
set +e
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_qwen3_moe_with_torchair
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair
Expand All @@ -179,6 +181,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
if: ${{ inputs.type == 'full' }}
run: |
set +e
pytest -sv tests/e2e/multicard/test_quantization.py
pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
Expand Down Expand Up @@ -266,6 +269,7 @@ jobs:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
run: |
set +e
pytest -sv \
tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe \
tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
Expand Down
60 changes: 30 additions & 30 deletions .github/workflows/vllm_ascend_test_pr_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,41 +37,41 @@ concurrency:
cancel-in-progress: true

jobs:
changes:
runs-on: ubuntu-latest
if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
steps:
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
e2e_tracker:
- '.github/workflows/vllm_ascend_test.yaml'
- '.github/workflows/_e2e_test.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
# changes:
# runs-on: ubuntu-latest
# if: ${{ contains(github.event.pull_request.labels.*.name, 'ready') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') }}
# outputs:
# e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
# ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
# steps:
# - uses: actions/checkout@v6
# - uses: dorny/paths-filter@v3
# id: filter
# with:
# filters: |
# e2e_tracker:
# - '.github/workflows/vllm_ascend_test_pr*'
# - '.github/workflows/_e2e_test.yaml'
# - 'vllm_ascend/**'
# - 'csrc/**'
# - 'cmake/**'
# - 'tests/e2e/**'
# - 'CMakeLists.txt'
# - 'setup.py'
# - 'requirements.txt'
# - 'requirements-dev.txt'
# - 'requirements-lint.txt'
# - 'packages.txt'
# ut_tracker:
# - 'tests/ut/**'

e2e-test:
name: e2e-full
strategy:
matrix:
vllm_version: [v0.11.2]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
vllm_version: [f72a817bdf6bd04b223a9da3af6c4ad1a676a98e]
# needs: [changes]
# if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
Expand Down
188 changes: 94 additions & 94 deletions .github/workflows/vllm_ascend_test_pr_light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,114 +39,114 @@ concurrency:
cancel-in-progress: true

jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
with:
vllm: v0.11.2
changes:
runs-on: ubuntu-latest
outputs:
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
steps:
- uses: actions/checkout@v6
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
e2e_tracker:
- '.github/workflows/vllm_ascend_test.yaml'
- 'vllm_ascend/**'
- 'csrc/**'
- 'cmake/**'
- 'tests/e2e/**'
- 'CMakeLists.txt'
- 'setup.py'
- 'requirements.txt'
- 'requirements-dev.txt'
- 'requirements-lint.txt'
- 'packages.txt'
ut_tracker:
- 'tests/ut/**'
# lint:
# uses: ./.github/workflows/pre-commit.yml
# with:
# vllm: v0.11.2
# changes:
# runs-on: ubuntu-latest
# outputs:
# e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
# ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
# steps:
# - uses: actions/checkout@v6
# - uses: dorny/paths-filter@v3
# id: filter
# with:
# filters: |
# e2e_tracker:
# - '.github/workflows/vllm_ascend_test_pr*'
# - 'vllm_ascend/**'
# - 'csrc/**'
# - 'cmake/**'
# - 'tests/e2e/**'
# - 'CMakeLists.txt'
# - 'setup.py'
# - 'requirements.txt'
# - 'requirements-dev.txt'
# - 'requirements-lint.txt'
# - 'packages.txt'
# ut_tracker:
# - 'tests/ut/**'

ut:
needs: [lint, changes]
name: unit test
# only trigger unit test after lint passed and the change is e2e and ut related.
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
runs-on: ubuntu-latest
container:
# fixme: vllm-ascend install failed with 8.3.rc2 on github action
image: quay.io/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
env:
VLLM_LOGGING_LEVEL: ERROR
VLLM_USE_MODELSCOPE: True
SOC_VERSION: ascend910b1
strategy:
matrix:
vllm_version: [v0.11.2]
steps:
- name: Install packages
run: |
apt-get update -y
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2
# ut:
# needs: [lint, changes]
# name: unit test
# # only trigger unit test after lint passed and the change is e2e and ut related.
# if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
# runs-on: ubuntu-latest
# container:
# # fixme: vllm-ascend install failed with 8.3.rc2 on github action
# image: quay.io/ascend/cann:8.2.rc2-910b-ubuntu22.04-py3.11
# env:
# VLLM_LOGGING_LEVEL: ERROR
# VLLM_USE_MODELSCOPE: True
# SOC_VERSION: ascend910b1
# strategy:
# matrix:
# vllm_version: [f72a817bdf6bd04b223a9da3af6c4ad1a676a98e, v0.11.2]
# steps:
# - name: Install packages
# run: |
# apt-get update -y
# apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev curl gnupg2

- name: Checkout vllm-project/vllm repo
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
ref: ${{ matrix.vllm_version }}
path: ./vllm-empty
# - name: Checkout vllm-project/vllm repo
# uses: actions/checkout@v6
# with:
# repository: vllm-project/vllm
# ref: ${{ matrix.vllm_version }}
# path: ./vllm-empty

- name: Install vllm-project/vllm from source
working-directory: ./vllm-empty
run: |
VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip uninstall -y triton
# - name: Install vllm-project/vllm from source
# working-directory: ./vllm-empty
# run: |
# VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
# python3 -m pip uninstall -y triton

- name: Checkout vllm-project/vllm-ascend repo
uses: actions/checkout@v6
# - name: Checkout vllm-project/vllm-ascend repo
# uses: actions/checkout@v6

- name: Install vllm-project/vllm-ascend
run: |
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
# - name: Install vllm-project/vllm-ascend
# run: |
# export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi
# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
# python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
# python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/

- name: Run unit test
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
run: |
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
--ignore tests/ut/torchair/models/test_torchair_deepseek_mtp.py \
--ignore tests/ut/torchair/models/test_torchair_deepseek_v2.py \
--ignore tests/ut/models/test_qwen2_vl.py \
--ignore tests/ut/models/test_qwen2_5_vl.py \
--ignore tests/ut/models/test_qwen2_5_vl_without_padding.py
# - name: Run unit test
# env:
# VLLM_WORKER_MULTIPROC_METHOD: spawn
# TORCH_DEVICE_BACKEND_AUTOLOAD: 0
# run: |
# export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
# pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
# --ignore tests/ut/torchair/models/test_torchair_deepseek_mtp.py \
# --ignore tests/ut/torchair/models/test_torchair_deepseek_v2.py \
# --ignore tests/ut/models/test_qwen2_vl.py \
# --ignore tests/ut/models/test_qwen2_5_vl.py \
# --ignore tests/ut/models/test_qwen2_5_vl_without_padding.py

- name: Upload coverage to Codecov
# only upload coverage when commits merged
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
with:
flags: unittests
name: vllm-ascend
verbose: true
# - name: Upload coverage to Codecov
# # only upload coverage when commits merged
# if: github.event_name == 'push' && github.ref == 'refs/heads/main'
# uses: codecov/codecov-action@v5
# env:
# CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
# with:
# flags: unittests
# name: vllm-ascend
# verbose: true

e2e-light:
name: e2e-light
strategy:
matrix:
vllm_version: [v0.11.2]
vllm_version: [f72a817bdf6bd04b223a9da3af6c4ad1a676a98e]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
#if: ${{ github.event_name == 'pull_request' && needs.lint.result == 'success' && needs.changes.outputs.e2e_tracker == 'true' && !contains(github.event.pull_request.labels.*.name, 'ready') }}
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/attention/attention_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def __init__(
AscendAttentionMetadataBuilder.reorder_batch_threshold = self.decode_threshold

scheduler_config = vllm_config.scheduler_config
self.chunked_prefill_enabled = scheduler_config.chunked_prefill_enabled
self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill

def reorder_batch(self, input_batch,
scheduler_output: "SchedulerOutput") -> bool:
Expand Down
4 changes: 2 additions & 2 deletions vllm_ascend/distributed/cpu_offload_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
from typing import TYPE_CHECKING, Any, Optional, Sequence

import torch
from vllm.attention import AttentionType
from vllm.attention.backends.abstract import AttentionType
from vllm.attention.layer import Attention
from vllm.config import VllmConfig
from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole)
from vllm.distributed.parallel_state import get_pp_group, get_tp_group
from vllm.logger import logger
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.utils import logger
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheSpec,
MLAAttentionSpec)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import defaultdict
from typing import Optional

from vllm.utils import logger, sha256
from vllm.logger import logger, sha256
from vllm.v1.core.block_pool import BlockPool
from vllm.v1.core.kv_cache_utils import (BlockHash, KVCacheBlock,
PrefixCachingMetrics)
Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/distributed/cpu_offload_manager/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import vllm.envs as envs
import zmq
from vllm.config import KVTransferConfig, VllmConfig
from vllm.utils import logger
from vllm.logger import logger
from vllm.utils.network_utils import make_zmq_socket
from vllm.utils.torch_utils import get_dtype_size
from vllm.v1.kv_cache_interface import AttentionSpec
Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/distributed/kvpool/ascend_store_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from vllm.distributed.kv_transfer.kv_connector.v1.base import (
KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole)
from vllm.forward_context import ForwardContext
from vllm.utils import logger
from vllm.logger import logger
from vllm.utils.network_utils import make_zmq_socket
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.core.sched.output import SchedulerOutput
Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/distributed/kvpool/backend/memcache_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import torch
from vllm.config import ParallelConfig
from vllm.utils import logger
from vllm.logger import logger

from vllm_ascend.distributed.kvpool.backend.backend import Backend

Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/distributed/kvpool/backend/mooncake_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# Third Party
from vllm.config import ParallelConfig
from vllm.utils import logger
from vllm.logger import logger
from vllm.utils.network_utils import get_ip

from vllm_ascend.distributed.kvpool.backend.backend import Backend
Expand Down
2 changes: 1 addition & 1 deletion vllm_ascend/distributed/kvpool/kv_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Any, Optional

import torch
from vllm.utils import logger
from vllm.logger import logger
from vllm.v1.core.kv_cache_utils import BlockHash

from vllm_ascend.distributed.kvpool.backend.backend import Backend
Expand Down
Loading
Loading