Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions DocSum/docker_compose/intel/hpu/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ Some HuggingFace resources, such as some models, are only accessible if you have
To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory:

```
# Please set your HUGGINGFACE_API_TOKEN.
export HUGGINGFACEHUB_API_TOKEN="Your_HuggingFace_API_Token"

source ./set_env.sh
```

Expand Down
8 changes: 7 additions & 1 deletion DocSum/docker_compose/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,20 @@ pushd "../../" > /dev/null
source .set_env.sh
popd > /dev/null

export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export http_proxy=$http_proxy
export https_proxy=$https_proxy
export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}

export LLM_ENDPOINT_PORT=8008
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"

export BLOCK_SIZE=128
export MAX_NUM_SEQS=256
export MAX_SEQ_LEN_TO_CAPTURE=2048
export NUM_CARDS=1

export MAX_INPUT_TOKENS=1024
export MAX_TOTAL_TOKENS=2048

Expand Down
29 changes: 29 additions & 0 deletions DocSum/docsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import asyncio
import base64
import json
import os
import subprocess
import uuid
Expand Down Expand Up @@ -142,11 +143,39 @@ def read_text_from_file(file, save_file_name):
return file_content


def align_generator(self, gen, **kwargs):
# OpenAI response format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1

json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if "ops" in json_data and "op" in json_data["ops"][0]:
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
else:
pass
elif (
json_data["choices"][0]["finish_reason"] != "eos_token"
and "content" in json_data["choices"][0]["delta"]
):
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
yield "data: [DONE]\n\n"


class DocSumService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()
self.megaservice_text_only = ServiceOrchestrator()
self.endpoint = str(MegaServiceEndpoint.DOC_SUMMARY)
Expand Down
Loading