Skip to content

Commit 737bb94

Browse files
authored
add seq2seq streaming integ test (deepjavalibrary#724)
1 parent eb0d5f0 commit 737bb94

File tree

3 files changed

+28
-1
lines changed

3 files changed

+28
-1
lines changed

.github/workflows/llm_integration.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,17 @@ jobs:
166166
python3 llm/client.py huggingface bigscience/bloom-3b
167167
rm -rf docker_env
168168
docker rm -f $(docker ps -aq)
169+
- name: Test streaming t5-large
170+
working-directory: tests/integration
171+
run: |
172+
rm -rf models
173+
echo -en "CUDA_VISIBLE_DEVICES=1" > docker_env
174+
python3 llm/prepare.py huggingface t5-large
175+
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
176+
serve
177+
python3 llm/client.py huggingface t5-large
178+
rm -rf docker_env
179+
docker rm -f $(docker ps -aq)
169180
- name: On fail step
170181
if: ${{ failure() }}
171182
working-directory: tests/integration

tests/integration/llm/client.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ def compute_model_name_hash(model_name):
106106
"worker": 1,
107107
"stream_output": True,
108108
},
109+
"t5-large": {
110+
"max_memory_per_gpu": [5.0],
111+
"batch_size": [1],
112+
"seq_length": [32],
113+
"worker": 1,
114+
"stream_output": True,
115+
},
109116
"no-code/nomic-ai/gpt4all-j": {
110117
"max_memory_per_gpu": [10.0, 12.0],
111118
"batch_size": [1, 4],
@@ -456,7 +463,10 @@ def test_handler(model, model_spec):
456463
model_name=spec.get("model_name", "test"))
457464
for i, batch_size in enumerate(spec["batch_size"]):
458465
for seq_length in spec["seq_length"]:
459-
req = {"inputs": batch_generation(batch_size)}
466+
if "t5" in model:
467+
req = {"inputs": t5_batch_generation(batch_size)}
468+
else:
469+
req = {"inputs": batch_generation(batch_size)}
460470
params = {"max_new_tokens": seq_length}
461471
req["parameters"] = params
462472
logging.info(f"req {req}")

tests/integration/llm/prepare.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@
114114
"option.enable_streaming": True,
115115
"gpu.maxWorkers": 1,
116116
},
117+
"t5-large": {
118+
"option.model_id": "t5-large",
119+
"option.tensor_parallel_degree": 1,
120+
"option.device_map": "auto",
121+
"option.enable_streaming": True,
122+
},
117123
}
118124

119125
ds_handler_list = {

0 commit comments

Comments
 (0)