diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py
index f8fec8d3a9..6a32023ecb 100644
--- a/comps/cores/proto/api_protocol.py
+++ b/comps/cores/proto/api_protocol.py
@@ -195,6 +195,7 @@ class ChatCompletionRequest(BaseModel):
     # top_p: Optional[float] = None # Priority use openai
     typical_p: Optional[float] = None
     # repetition_penalty: Optional[float] = None
+    timeout: Optional[int] = None
 
     # doc: begin-chat-completion-extra-params
     echo: Optional[bool] = Field(
diff --git a/comps/llms/src/doc-summarization/README.md b/comps/llms/src/doc-summarization/README.md
index 77e0969d2a..287208c977 100644
--- a/comps/llms/src/doc-summarization/README.md
+++ b/comps/llms/src/doc-summarization/README.md
@@ -133,6 +133,8 @@ curl http://${your_ip}:9000/v1/docsum \
 
 "summary_type" is set to be "auto" by default, in this mode we will check input token length, if it exceed `MAX_INPUT_TOKENS`, `summary_type` will automatically be set to `refine` mode, otherwise will be set to `stuff` mode.
 
+With long contexts, request may get canceled due to its generation taking longer than the default `timeout` value (120s for TGI). Increase it as needed.
+
 **summary_type=stuff**
 
 In this mode LLM generate summary based on complete input text. In this case please carefully set `MAX_INPUT_TOKENS` and `MAX_TOTAL_TOKENS` according to your model and device memory, otherwise it may exceed LLM context limit and raise error when meet long context.
@@ -157,7 +159,7 @@ In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - input.ma
 ```bash
 curl http://${your_ip}:9000/v1/docsum \
   -X POST \
-  -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}' \
+  -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false, "timeout":200}' \
   -H 'Content-Type: application/json'
 ```
 
@@ -170,6 +172,6 @@ In this mode, default `chunk_size` is set to be `min(MAX_TOTAL_TOKENS - 2 * inpu
 ```bash
 curl http://${your_ip}:9000/v1/docsum \
   -X POST \
-  -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}' \
+  -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}' \
   -H 'Content-Type: application/json'
 ```
diff --git a/comps/llms/src/doc-summarization/integrations/tgi.py b/comps/llms/src/doc-summarization/integrations/tgi.py
index 902334bc03..f62813c260 100644
--- a/comps/llms/src/doc-summarization/integrations/tgi.py
+++ b/comps/llms/src/doc-summarization/integrations/tgi.py
@@ -70,6 +70,7 @@ async def invoke(self, input: DocSumChatCompletionRequest):
             temperature=input.temperature if input.temperature else 0.01,
             repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03,
             streaming=input.stream,
+            timeout=input.timeout if input.timeout is not None else 120,
             server_kwargs=server_kwargs,
             task="text-generation",
         )
diff --git a/comps/llms/src/doc-summarization/integrations/vllm.py b/comps/llms/src/doc-summarization/integrations/vllm.py
index c292f850e5..a2126c4d52 100644
--- a/comps/llms/src/doc-summarization/integrations/vllm.py
+++ b/comps/llms/src/doc-summarization/integrations/vllm.py
@@ -63,6 +63,7 @@ async def invoke(self, input: DocSumChatCompletionRequest):
             top_p=input.top_p if input.top_p else 0.95,
             streaming=input.stream,
             temperature=input.temperature if input.temperature else 0.01,
+            request_timeout=float(input.timeout) if input.timeout is not None else None,
         )
         result = await self.generate(input, self.client)
 
diff --git a/comps/llms/src/faq-generation/integrations/tgi.py b/comps/llms/src/faq-generation/integrations/tgi.py
index edfa64bcb2..887f1666c1 100644
--- a/comps/llms/src/faq-generation/integrations/tgi.py
+++ b/comps/llms/src/faq-generation/integrations/tgi.py
@@ -67,6 +67,7 @@ async def invoke(self, input: ChatCompletionRequest):
             temperature=input.temperature if input.temperature else 0.01,
             repetition_penalty=input.repetition_penalty if input.repetition_penalty else 1.03,
             streaming=input.stream,
+            timeout=input.timeout if input.timeout is not None else 120,
             server_kwargs=server_kwargs,
         )
         result = await self.generate(input, self.client)
diff --git a/comps/llms/src/faq-generation/integrations/vllm.py b/comps/llms/src/faq-generation/integrations/vllm.py
index bf891ea7ff..a812b2b5fa 100644
--- a/comps/llms/src/faq-generation/integrations/vllm.py
+++ b/comps/llms/src/faq-generation/integrations/vllm.py
@@ -60,6 +60,7 @@ async def invoke(self, input: ChatCompletionRequest):
             top_p=input.top_p if input.top_p else 0.95,
             streaming=input.stream,
             temperature=input.temperature if input.temperature else 0.01,
+            request_timeout=float(input.timeout) if input.timeout is not None else None,
         )
         result = await self.generate(input, self.client)
 
diff --git a/tests/llms/test_llms_doc-summarization_tgi.sh b/tests/llms/test_llms_doc-summarization_tgi.sh
index 16e2018548..18f9b0da86 100644
--- a/tests/llms/test_llms_doc-summarization_tgi.sh
+++ b/tests/llms/test_llms_doc-summarization_tgi.sh
@@ -125,7 +125,7 @@ function validate_microservices() {
         'text' \
         "docsum-tgi" \
         "docsum-tgi" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false, "timeout":200}'
 
     echo "Validate refine mode..."
     validate_services \
@@ -133,7 +133,7 @@ function validate_microservices() {
         'text' \
         "docsum-tgi" \
         "docsum-tgi" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}'
 }
 
 function stop_docker() {
diff --git a/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh
index b8c97f5b60..bc6cd03b0f 100644
--- a/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh
+++ b/tests/llms/test_llms_doc-summarization_tgi_on_intel_hpu.sh
@@ -126,7 +126,7 @@ function validate_microservices() {
         'text' \
         "docsum-tgi-gaudi" \
         "docsum-tgi-gaudi" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false, "timeout":200}'
 
     echo "Validate refine mode..."
     validate_services \
@@ -134,7 +134,7 @@ function validate_microservices() {
         'text' \
         "docsum-tgi-gaudi" \
         "docsum-tgi-gaudi" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}'
 }
 
 function stop_docker() {
diff --git a/tests/llms/test_llms_doc-summarization_vllm.sh b/tests/llms/test_llms_doc-summarization_vllm.sh
index 42e79aa1e9..88cbef9c92 100644
--- a/tests/llms/test_llms_doc-summarization_vllm.sh
+++ b/tests/llms/test_llms_doc-summarization_vllm.sh
@@ -140,7 +140,7 @@ function validate_microservices() {
         'text' \
         "docsum-vllm" \
         "docsum-vllm" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false, "timeout":200}'
 
     echo "Validate refine mode..."
     validate_services \
@@ -148,7 +148,7 @@ function validate_microservices() {
         'text' \
         "docsum-vllm" \
         "docsum-vllm" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}'
 }
 
 function stop_docker() {
diff --git a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
index a6096bd309..31bc13d693 100644
--- a/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
+++ b/tests/llms/test_llms_doc-summarization_vllm_on_intel_hpu.sh
@@ -139,7 +139,7 @@ function validate_microservices() {
         'text' \
         "docsum-vllm-gaudi" \
         "docsum-vllm-gaudi" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "map_reduce", "chunk_size": 2000, "stream":false, "timeout":200}'
 
     echo "Validate refine mode..."
     validate_services \
@@ -147,7 +147,7 @@ function validate_microservices() {
         'text' \
         "docsum-vllm-gaudi" \
         "docsum-vllm-gaudi" \
-        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}'
 }
 
 function stop_docker() {