Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff==0.1.5
pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
- name: Analysing the code with ruff
run: |
ruff vllm tests
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
2 changes: 1 addition & 1 deletion benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def main(args: argparse.Namespace):
parser.add_argument(
"--disable-tqdm",
action="store_true",
help="Specify to disbale tqdm progress bar.",
help="Specify to disable tqdm progress bar.",
)
parser.add_argument(
"--save-result",
Expand Down
51 changes: 48 additions & 3 deletions format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ builtin cd "$ROOT" || exit 1
YAPF_VERSION=$(yapf --version | awk '{print $2}')
RUFF_VERSION=$(ruff --version | awk '{print $2}')
MYPY_VERSION=$(mypy --version | awk '{print $2}')
CODESPELL_VERSION=$(codespell --version)

# # params: tool name, tool version, required version
tool_version_check() {
Expand All @@ -36,6 +37,7 @@ tool_version_check() {
tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)"
tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)"
tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)"
tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-dev.txt | cut -d'=' -f3)"

YAPF_FLAGS=(
'--recursive'
Expand Down Expand Up @@ -93,6 +95,47 @@ echo 'vLLM yapf: Done'
# echo 'vLLM mypy:'
# mypy

# check spelling of specified files
spell_check() {
codespell "$@"
}

spell_check_all(){
codespell --toml pyproject.toml
}

# Spelling check of files that differ from main branch.
spell_check_changed() {
# The `if` guard ensures that the list of filenames is not empty, which
# could cause ruff to receive 0 positional arguments, making it hang
# waiting for STDIN.
#
# `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
# exist on both branches.
MERGEBASE="$(git merge-base origin/main HEAD)"

if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
codespell
fi
}

# Run Codespell
## This flag runs spell check of individual files. --files *must* be the first command line
## arg to use this option.
if [[ "$1" == '--files' ]]; then
spell_check "${@:2}"
# If `--all` is passed, then any further arguments are ignored and the
# entire python directory is linted.
elif [[ "$1" == '--all' ]]; then
spell_check_all
else
# Check spelling only of the files that changed in last commit.
spell_check_changed
fi
echo 'vLLM codespell: Done'


# Lint specified files
lint() {
ruff "$@"
Expand All @@ -117,9 +160,9 @@ lint_changed() {
}

# Run Ruff
echo 'vLLM Ruff:'
## This flag lints individual files. --files *must* be the first command line
## arg to use this option.
echo 'vLLM ruff:'
### This flag lints individual files. --files *must* be the first command line
### arg to use this option.
if [[ "$1" == '--files' ]]; then
lint "${@:2}"
# If `--all` is passed, then any further arguments are ignored and the
Expand All @@ -139,3 +182,5 @@ if ! git diff --quiet &>/dev/null; then

exit 1
fi


8 changes: 0 additions & 8 deletions mypy.ini

This file was deleted.

18 changes: 18 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,22 @@ ignore = [
"E731",
# line too long, handled by black formatting
"E501",
# .strip() with multi-character strings
"B005",
# Loop control variable not used within loop body
"B007",
]

[tool.mypy]
python_version = "3.8"

ignore_missing_imports = true

files = "vllm"
# TODO(woosuk): Include the code from Megatron and HuggingFace.
exclude = "vllm/model_executor/parallel_utils/|vllm/model_executor/models/"


[tool.codespell]
ignore-words-list = "dout, te, indicies"
skip = "./tests/prompts"
2 changes: 2 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# formatting
yapf==0.32.0
toml==0.10.2
tomli==2.0.1
ruff==0.1.5
codespell==2.2.6

# type checking
mypy==0.991
Expand Down
2 changes: 1 addition & 1 deletion tests/lora/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def create_random_embedding_layer():
256,
org_num_embeddings=512)
expanded_embedding.weight.data[:512, :] = embedding_data
# We need to deepcopy the embedding as it will be modifed
# We need to deepcopy the embedding as it will be modified
# in place
lora_embedding = VocabParallelEmbeddingWithLoRA(
deepcopy(expanded_embedding))
Expand Down
4 changes: 2 additions & 2 deletions tests/lora/test_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]",
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]",
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]",
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]"
"[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]"
]
sampling_params = vllm.SamplingParams(temperature=0,
max_tokens=256,
Expand Down Expand Up @@ -53,7 +53,7 @@ def test_llama_lora(sql_lora_files, tp_size):
"\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m",
" Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ",
" Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ",
"\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
"\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
]
expected_lora_output = [
" SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",
Expand Down
2 changes: 1 addition & 1 deletion vllm/core/block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:
if len(block_table) < len(logical_blocks):
if (self.block_sliding_window
and len(block_table) >= self.block_sliding_window):
# re-use a block
# reuse a block
block_table.append(block_table[len(block_table) %
self.block_sliding_window])
else:
Expand Down
2 changes: 1 addition & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def get_num_unfinished_seq_groups(self) -> int:
return len(self.waiting) + len(self.running) + len(self.swapped)

def _schedule(self) -> SchedulerOutputs:
# Blocks that need to be swaped or copied before model execution.
# Blocks that need to be swapped or copied before model execution.
blocks_to_swap_in: Dict[int, int] = {}
blocks_to_swap_out: Dict[int, int] = {}
blocks_to_copy: Dict[int, List[int]] = {}
Expand Down
2 changes: 1 addition & 1 deletion vllm/lora/punica.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def add_lora(y: torch.Tensor,
r = wb_t_all.size(-1)
if buffer is None:
# We set the buffer to be float32 by default to avoid
# numerical innacuracies that would otherwise happen
# numerical inaccuracies that would otherwise happen
# due to downcasting.
buffer = torch.zeros((x.size(0), r),
dtype=torch.float32,
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/triton_kernel/prefix_prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def _fwd_kernel_alibi(
alibi_start_q = tl.arange(
0, BLOCK_M) + block_start_loc + cur_batch_ctx_len
alibi_start_k = cur_batch_ctx_len
# # init debuger
# # init debugger
# offset_db_q = tl.arange(0, BLOCK_M) + block_start_loc
# offset_db_k = tl.arange(0, BLOCK_N)
# calc q[BLOCK_M, BLOCK_MODEL] mul k[prefix_len: , BLOCK_DMODEL]
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/decilm.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DeciLMForCausalLM(LlamaForCausalLM):
Based on the llama executor.

The main difference is that DeciLM uses Variable Grouped Query Attention.
The constant number of GQA heads in the decoder is overriden with a value
The constant number of GQA heads in the decoder is overridden with a value
per layer.

Usually, in the HuggingFace implementation, instead of
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/parallel_utils/custom_all_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ def init_custom_ar() -> None:
if world_size not in _SUPPORTED_WORLD_SIZES:
logger.warn(
"Custom allreduce is disabled due to an unsupported world size: "
"%d. Supported world sizes: %s. To slience this warning, specify"
"%d. Supported world sizes: %s. To silence this warning, specify"
"disable_custom_all_reduce=True explicitly.", world_size,
str(_SUPPORTED_WORLD_SIZES))
return
if not _can_p2p(rank, world_size):
logger.warn(
"Custom allreduce is disabled because your platform lacks GPU P2P"
" capability. To slience this warning, specify"
" capability. To silence this warning, specify"
"disable_custom_all_reduce=True explicitly.")
return
_CA_HANDLE = CustomAllreduce(rank, world_size)
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/parallel_utils/parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def get_pipeline_model_parallel_next_rank():


def get_pipeline_model_parallel_prev_rank():
"""Return the global rank that preceeds the caller in the pipeline"""
"""Return the global rank that precedes the caller in the pipeline"""
assert _PIPELINE_GLOBAL_RANKS is not None, (
"Pipeline parallel group is not initialized")
rank_in_pipeline = get_pipeline_model_parallel_rank()
Expand Down
2 changes: 1 addition & 1 deletion vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _generate_random_fp8_e5m2(
# NOTE(zhaoyang): Due to NaN and Inf representation for fp8 data type,
# it may occur Inf or NaN if we directly use torch.randint
# to generate random data for fp8 data.
# For example, s.11111.00 in fp8e5m2 format repesents Inf.
# For example, s.11111.00 in fp8e5m2 format represents Inf.
# | E4M3 | E5M2
#-----|-------------|-------------------
# Inf | N/A | s.11111.00
Expand Down