From 2d4d7f2048e2d0498deaaf90f511c538d48a134d Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 27 Oct 2025 15:30:07 +0800 Subject: [PATCH 1/2] add online serving usage for deepseek-ocr Signed-off-by: Isotr0py --- DeepSeek/DeepSeek-OCR.md | 54 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/DeepSeek/DeepSeek-OCR.md b/DeepSeek/DeepSeek-OCR.md index 0456eb7..30d3106 100644 --- a/DeepSeek/DeepSeek-OCR.md +++ b/DeepSeek/DeepSeek-OCR.md @@ -13,6 +13,7 @@ uv pip install -U vllm --pre --extra-index-url https://wheels.vllm.ai/nightly ``` ## Running DeepSeek-OCR +### Offline OCR tasks In this guide, we demonstrate how to set up DeepSeek-OCR for offline OCR batch processing tasks. @@ -64,6 +65,59 @@ for output in model_outputs: print(output.outputs[0].text) ``` +### Online OCR serving +In this guide, we demonstrate how to set up DeepSeek-OCR for online OCR serving with OpenAI compatible API server. + +```bash +vllm serve deepseek-ai/DeepSeek-OCR --logits_processors vllm.model_executor.models.deepseek_ocr.NGramPerReqLogitsProcessor +``` + +```python3 +import time +from openai import OpenAI + +client = OpenAI( + api_key="EMPTY", + base_url="http://localhost:8000/v1", + timeout=3600 +) + +messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://ofasys-multimodal-wlcb-3-toshanghai.oss-accelerate.aliyuncs.com/wpf272043/keepme/image/receipt.png" + } + }, + { + "type": "text", + "text": "Free OCR." + } + ] + } +] + +start = time.time() +response = client.chat.completions.create( + model="deepseek-ai/DeepSeek-OCR", + messages=messages, + max_tokens=2048, + extra_body={ + "skip_special_tokens": False, + # args used to control custom logits processor + "vllm_xargs": { + "ngram_size": 30, + "window_size": 90, + }, + }, +) +print(f"Response costs: {time.time() - start:.2f}s") +print(f"Generated text: {response.choices[0].message.content}") +``` + ## Configuration Tips - **It's important to use the custom logits processor** along with the model for the optimal OCR and markdown generation performance. - Unlike multi-turn chat use cases, we do not expect OCR tasks to benefit significantly from prefix caching or image reuse, therefore it's recommended to turn off these features to avoid unnecessary hashing and caching. From c2678d517bf20849b3078bca9306355566ba10a9 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 27 Oct 2025 16:21:55 +0800 Subject: [PATCH 2/2] address comment Signed-off-by: Isotr0py --- DeepSeek/DeepSeek-OCR.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DeepSeek/DeepSeek-OCR.md b/DeepSeek/DeepSeek-OCR.md index 30d3106..790479e 100644 --- a/DeepSeek/DeepSeek-OCR.md +++ b/DeepSeek/DeepSeek-OCR.md @@ -69,7 +69,7 @@ for output in model_outputs: In this guide, we demonstrate how to set up DeepSeek-OCR for online OCR serving with OpenAI compatible API server. ```bash -vllm serve deepseek-ai/DeepSeek-OCR --logits_processors vllm.model_executor.models.deepseek_ocr.NGramPerReqLogitsProcessor +vllm serve deepseek-ai/DeepSeek-OCR --logits_processors vllm.model_executor.models.deepseek_ocr.NGramPerReqLogitsProcessor --no-enable-prefix-caching --mm-processor-cache-gb 0 ``` ```python3 @@ -105,12 +105,15 @@ response = client.chat.completions.create( model="deepseek-ai/DeepSeek-OCR", messages=messages, max_tokens=2048, + temperature=0.0, extra_body={ "skip_special_tokens": False, # args used to control custom logits processor "vllm_xargs": { "ngram_size": 30, "window_size": 90, + # whitelist: , + "whitelist_token_ids": [128821, 128822], }, }, )