Skip to content

Commit fbb75b7

Browse files
committed
reverting to no prompt-logprobs support; merged in main
2 parents bae1fb9 + bc6e42a commit fbb75b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+3257
-502
lines changed

.buildkite/run-amd-test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ docker run \
7575
--network host \
7676
--shm-size=16gb \
7777
--rm \
78+
-e HIP_VISIBLE_DEVICES=0 \
7879
-e HF_TOKEN \
7980
-v ${HF_CACHE}:${HF_MOUNT} \
8081
-e HF_HOME=${HF_MOUNT} \

.buildkite/test-pipeline.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,13 @@ steps:
233233
parallelism: 4
234234

235235
- label: Tensorizer Test # 11min
236+
mirror_hardwares: [amd]
236237
soft_fail: true
237238
source_file_dependencies:
238239
- vllm/model_executor/model_loader
239240
- tests/tensorizer_loader
240241
commands:
241-
- apt-get install -y curl libsodium23
242+
- apt-get update && apt-get install -y curl libsodium23
242243
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
243244
- pytest -v -s tensorizer_loader
244245

.github/workflows/mypy.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ jobs:
3535
mypy
3636
mypy tests --follow-imports skip
3737
mypy vllm/attention --follow-imports skip
38-
mypy vllm/core --follow-imports skip
3938
mypy vllm/distributed --follow-imports skip
4039
mypy vllm/engine --follow-imports skip
4140
mypy vllm/executor --follow-imports skip

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,11 @@ set(VLLM_MOE_EXT_SRC
296296
"csrc/moe/torch_bindings.cpp"
297297
"csrc/moe/topk_softmax_kernels.cu")
298298

299+
if(VLLM_GPU_LANG STREQUAL "CUDA")
300+
list(APPEND VLLM_MOE_EXT_SRC
301+
"csrc/moe/marlin_moe_ops.cu")
302+
endif()
303+
299304
define_gpu_extension_target(
300305
_moe_C
301306
DESTINATION vllm

benchmarks/launch_tgi_server.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ TOKENS=$2
66

77
docker run -e HF_TOKEN=$HF_TOKEN --gpus all --shm-size 1g -p $PORT:80 \
88
-v $PWD/data:/data \
9-
ghcr.io/huggingface/text-generation-inference:1.4.0 \
9+
ghcr.io/huggingface/text-generation-inference:2.2.0 \
1010
--model-id $MODEL \
1111
--sharded false \
1212
--max-input-length 1024 \

0 commit comments

Comments
 (0)