Skip to content

Commit c478c91

Browse files
authored
Merge pull request #2 from McLavish/feature/bert-inference
Feature/bert inference
2 parents e9916db + 25fd1d9 commit c478c91

File tree

15 files changed

+260
-4
lines changed

15 files changed

+260
-4
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
url = https://github.com/mcopik/pypapi.git
44
[submodule "benchmarks-data"]
55
path = benchmarks-data
6-
url = https://github.com/spcl/serverless-benchmarks-data.git
6+
url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git

.mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
[mypy-docker]
44
ignore_missing_imports = True
55

6+
[mypy-docker.*]
7+
ignore_missing_imports = True
8+
69
[mypy-tzlocal]
710
ignore_missing_imports = True
811

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"timeout": 60,
3+
"memory": 512,
4+
"languages": ["python"],
5+
"modules": ["storage"]
6+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
3+
4+
def buckets_count():
5+
# model bucket and text bucket
6+
return (2, 0)
7+
8+
9+
def upload_files(data_root, data_dir, upload_func):
10+
for root, _, files in os.walk(data_dir):
11+
prefix = os.path.relpath(root, data_root)
12+
for file in files:
13+
filepath = os.path.join(root, file)
14+
relative_key = os.path.join(prefix, file)
15+
upload_func(0, relative_key, filepath)
16+
17+
18+
def generate_input(
19+
data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func
20+
):
21+
model_archive = "bert-tiny-onnx.tar.gz"
22+
upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive))
23+
24+
text_filename = "sentences.jsonl"
25+
upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename))
26+
27+
input_config = {"object": {}, "bucket": {}}
28+
input_config["object"]["model"] = model_archive
29+
input_config["object"]["input"] = text_filename
30+
input_config["bucket"]["bucket"] = benchmarks_bucket
31+
input_config["bucket"]["model"] = input_paths[0]
32+
input_config["bucket"]["text"] = input_paths[1]
33+
return input_config
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import datetime
2+
import json
3+
import os
4+
import tarfile
5+
import uuid
6+
from typing import Dict, List, Optional
7+
8+
import numpy as np
9+
import onnxruntime as ort
10+
from tokenizers import Tokenizer
11+
12+
from . import storage
13+
14+
client = storage.storage.get_instance()
15+
16+
MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz"
17+
MODEL_DIRECTORY = "/tmp/bert_language_model"
18+
MODEL_SUBDIR = "bert-tiny-onnx"
19+
20+
_session: Optional[ort.InferenceSession] = None
21+
_tokenizer: Optional[Tokenizer] = None
22+
_labels: Optional[Dict[int, str]] = None
23+
24+
25+
def _ensure_model(bucket: str, model_prefix: str):
26+
"""
27+
Lazily download and initialize the ONNX model and tokenizer.
28+
"""
29+
global _session, _tokenizer, _labels
30+
31+
model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR)
32+
model_download_begin = datetime.datetime.now()
33+
model_download_end = model_download_begin
34+
35+
if _session is None or _tokenizer is None or _labels is None:
36+
if not os.path.exists(model_path):
37+
os.makedirs(MODEL_DIRECTORY, exist_ok=True)
38+
archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}")
39+
client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path)
40+
model_download_end = datetime.datetime.now()
41+
42+
with tarfile.open(archive_path, "r:gz") as tar:
43+
tar.extractall(MODEL_DIRECTORY)
44+
os.remove(archive_path)
45+
else:
46+
model_download_begin = datetime.datetime.now()
47+
model_download_end = model_download_begin
48+
49+
model_process_begin = datetime.datetime.now()
50+
tokenizer_path = os.path.join(model_path, "tokenizer.json")
51+
_tokenizer = Tokenizer.from_file(tokenizer_path)
52+
_tokenizer.enable_truncation(max_length=128)
53+
_tokenizer.enable_padding(length=128)
54+
55+
label_map_path = os.path.join(model_path, "label_map.json")
56+
with open(label_map_path, "r") as f:
57+
raw_labels = json.load(f)
58+
_labels = {int(idx): label for idx, label in raw_labels.items()}
59+
60+
onnx_path = os.path.join(model_path, "model.onnx")
61+
62+
available = ort.get_available_providers()
63+
if "CUDAExecutionProvider" not in available:
64+
raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})")
65+
66+
_session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"])
67+
model_process_end = datetime.datetime.now()
68+
else:
69+
model_process_begin = datetime.datetime.now()
70+
model_process_end = model_process_begin
71+
72+
model_download_time = (model_download_end - model_download_begin) / datetime.timedelta(
73+
microseconds=1
74+
)
75+
model_process_time = (model_process_end - model_process_begin) / datetime.timedelta(
76+
microseconds=1
77+
)
78+
79+
return model_download_time, model_process_time
80+
81+
82+
def _prepare_inputs(sentences: List[str]):
83+
assert _tokenizer is not None
84+
85+
encodings = _tokenizer.encode_batch(sentences)
86+
87+
input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64)
88+
attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64)
89+
token_type_ids = np.array(
90+
[enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings],
91+
dtype=np.int64,
92+
)
93+
94+
return {
95+
"input_ids": input_ids,
96+
"attention_mask": attention_mask,
97+
"token_type_ids": token_type_ids,
98+
}
99+
100+
101+
def _softmax(logits: np.ndarray) -> np.ndarray:
102+
shifted = logits - np.max(logits, axis=1, keepdims=True)
103+
exp = np.exp(shifted)
104+
return exp / np.sum(exp, axis=1, keepdims=True)
105+
106+
107+
def handler(event):
108+
bucket = event.get("bucket", {}).get("bucket")
109+
model_prefix = event.get("bucket", {}).get("model")
110+
text_prefix = event.get("bucket", {}).get("text")
111+
text_key = event.get("object", {}).get("input")
112+
113+
download_begin = datetime.datetime.now()
114+
text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}")
115+
client.download(bucket, os.path.join(text_prefix, text_key), text_download_path)
116+
download_end = datetime.datetime.now()
117+
118+
model_download_time, model_process_time = _ensure_model(bucket, model_prefix)
119+
assert _session is not None and _labels is not None and _tokenizer is not None
120+
121+
with open(text_download_path, "r") as f:
122+
sentences = [json.loads(line)["text"] for line in f if line.strip()]
123+
124+
os.remove(text_download_path)
125+
126+
inference_begin = datetime.datetime.now()
127+
inputs = _prepare_inputs(sentences)
128+
outputs = _session.run(None, inputs)
129+
logits = outputs[0]
130+
probabilities = _softmax(logits)
131+
inference_end = datetime.datetime.now()
132+
133+
results = []
134+
for sentence, probs in zip(sentences, probabilities):
135+
label_idx = int(np.argmax(probs))
136+
label = _labels.get(label_idx, str(label_idx))
137+
results.append(
138+
{
139+
"text": sentence,
140+
"label": label,
141+
"confidence": float(probs[label_idx]),
142+
"raw_scores": probs.tolist(),
143+
}
144+
)
145+
146+
download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1)
147+
compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1)
148+
149+
return {
150+
"result": {"predictions": results},
151+
"measurement": {
152+
"download_time": download_time + model_download_time,
153+
"compute_time": compute_time + model_process_time,
154+
"model_time": model_process_time,
155+
"model_download_time": model_download_time,
156+
},
157+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
# No additional initialization required for the BERT inference benchmark.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo
2+
3+
PACKAGE_DIR=$1
4+
echo "Original size $(du -sh $1 | cut -f1)"
5+
6+
CUR_DIR=$(pwd)
7+
cd $1
8+
# cleaning libs
9+
rm -rf external
10+
find . -type d -name "tests" -exec rm -rf {} +
11+
find . -type d -name "test" -exec rm -rf {} +
12+
find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} +
13+
14+
# cleaning
15+
# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure
16+
find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip
17+
find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip
18+
19+
rm -r pip >/dev/null
20+
rm -r pip-* >/dev/null
21+
rm -r wheel >/dev/null
22+
rm -r wheel-* >/dev/null
23+
rm easy_install.py >/dev/null
24+
find . -name \*.pyc -delete
25+
cd ${CUR_DIR}
26+
echo "Stripped size $(du -sh $1 | cut -f1)"
27+
28+
TORCH_DIR=".python_packages/lib/site-packages/torch"
29+
if [ -d "$1/${TORCH_DIR}" ]; then
30+
cd $1
31+
zip -qr torch.zip ${TORCH_DIR}
32+
rm -rf ${TORCH_DIR}
33+
cd ${CUR_DIR}
34+
echo "Torch-zipped size $(du -sh $1 | cut -f1)"
35+
fi
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy==1.24.4
2+
onnxruntime-gpu==1.16.3
3+
tokenizers==0.13.3
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy==1.24.4
2+
onnxruntime-gpu==1.16.3
3+
tokenizers==0.13.3
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
numpy==1.24.4
2+
onnxruntime-gpu==1.16.3
3+
tokenizers==0.13.3

0 commit comments

Comments
 (0)