opea-project · chensuyue · Jun 6, 2025 · May 13, 2025 · May 14, 2025 · May 14, 2025
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md
@@ -47,6 +47,9 @@ Some HuggingFace resources, such as some models, are only accessible if you have
 To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory:
 
 ```
+# Please set your HUGGINGFACE_API_TOKEN.
+export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token"
+
 source ./set_env.sh
 ```
 

diff --git a/DocSum/docker_compose/set_env.sh b/DocSum/docker_compose/set_env.sh
@@ -6,14 +6,20 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
 export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
 export http_proxy=$http_proxy
 export https_proxy=$https_proxy
-export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
 export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 
 export LLM_ENDPOINT_PORT=8008
 export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+
+export BLOCK_SIZE=128
+export MAX_NUM_SEQS=256
+export MAX_SEQ_LEN_TO_CAPTURE=2048
+export NUM_CARDS=1
+
 export MAX_INPUT_TOKENS=1024
 export MAX_TOTAL_TOKENS=2048
 

diff --git a/DocSum/docsum.py b/DocSum/docsum.py
@@ -3,6 +3,7 @@
 
 import asyncio
 import base64
+import json
 import os
 import subprocess
 import uuid
@@ -142,11 +143,39 @@ def read_text_from_file(file, save_file_name):
     return file_content
 
 
+def align_generator(self, gen, **kwargs):
+    # OpenAI response format
+    # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
+    for line in gen:
+        line = line.decode("utf-8")
+        start = line.find("{")
+        end = line.rfind("}") + 1
+
+        json_str = line[start:end]
+        try:
+            # sometimes yield empty chunk, do a fallback here
+            json_data = json.loads(json_str)
+            if "ops" in json_data and "op" in json_data["ops"][0]:
+                if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
+                    yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
+                else:
+                    pass
+            elif (
+                json_data["choices"][0]["finish_reason"] != "eos_token"
+                and "content" in json_data["choices"][0]["delta"]
+            ):
+                yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
+        except Exception as e:
+            yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
+    yield "data: [DONE]\n\n"
+
+
 class DocSumService:
     def __init__(self, host="0.0.0.0", port=8000):
         self.host = host
         self.port = port
         ServiceOrchestrator.align_inputs = align_inputs
+        ServiceOrchestrator.align_generator = align_generator
         self.megaservice = ServiceOrchestrator()
         self.megaservice_text_only = ServiceOrchestrator()
         self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY)