diff --git a/.gitmodules b/.gitmodules index 4feae9bfb..c33a17880 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = https://github.com/spcl/serverless-benchmarks-data.git + url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git diff --git a/.mypy.ini b/.mypy.ini index e202650ed..636105bfa 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -3,6 +3,9 @@ [mypy-docker] ignore_missing_imports = True +[mypy-docker.*] +ignore_missing_imports = True + [mypy-tzlocal] ignore_missing_imports = True diff --git a/benchmarks/400.inference/412.language-bert/config.json b/benchmarks/400.inference/412.language-bert/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/412.language-bert/input.py b/benchmarks/400.inference/412.language-bert/input.py new file mode 100644 index 000000000..9af7ecb56 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/input.py @@ -0,0 +1,33 @@ +import os + + +def buckets_count(): + # model bucket and text bucket + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + filepath = os.path.join(root, file) + relative_key = os.path.join(prefix, file) + upload_func(0, relative_key, filepath) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_archive = "bert-tiny-onnx.tar.gz" + upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) + + text_filename = "sentences.jsonl" + upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_archive + input_config["object"]["input"] = text_filename + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["model"] = input_paths[0] + input_config["bucket"]["text"] = input_paths[1] + return input_config diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py new file mode 100644 index 000000000..7e4f981ef --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -0,0 +1,157 @@ +import datetime +import json +import os +import tarfile +import uuid +from typing import Dict, List, Optional + +import numpy as np +import onnxruntime as ort +from tokenizers import Tokenizer + +from . import storage + +client = storage.storage.get_instance() + +MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz" +MODEL_DIRECTORY = "/tmp/bert_language_model" +MODEL_SUBDIR = "bert-tiny-onnx" + +_session: Optional[ort.InferenceSession] = None +_tokenizer: Optional[Tokenizer] = None +_labels: Optional[Dict[int, str]] = None + + +def _ensure_model(bucket: str, model_prefix: str): + """ + Lazily download and initialize the ONNX model and tokenizer. + """ + global _session, _tokenizer, _labels + + model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _tokenizer is None or _labels is None: + if not os.path.exists(model_path): + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}") + client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + else: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + model_process_begin = datetime.datetime.now() + tokenizer_path = os.path.join(model_path, "tokenizer.json") + _tokenizer = Tokenizer.from_file(tokenizer_path) + _tokenizer.enable_truncation(max_length=128) + _tokenizer.enable_padding(length=128) + + label_map_path = os.path.join(model_path, "label_map.json") + with open(label_map_path, "r") as f: + raw_labels = json.load(f) + _labels = {int(idx): label for idx, label in raw_labels.items()} + + onnx_path = os.path.join(model_path, "model.onnx") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _prepare_inputs(sentences: List[str]): + assert _tokenizer is not None + + encodings = _tokenizer.encode_batch(sentences) + + input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64) + attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64) + token_type_ids = np.array( + [enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings], + dtype=np.int64, + ) + + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + text_prefix = event.get("bucket", {}).get("text") + text_key = event.get("object", {}).get("input") + + download_begin = datetime.datetime.now() + text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}") + client.download(bucket, os.path.join(text_prefix, text_key), text_download_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix) + assert _session is not None and _labels is not None and _tokenizer is not None + + with open(text_download_path, "r") as f: + sentences = [json.loads(line)["text"] for line in f if line.strip()] + + os.remove(text_download_path) + + inference_begin = datetime.datetime.now() + inputs = _prepare_inputs(sentences) + outputs = _session.run(None, inputs) + logits = outputs[0] + probabilities = _softmax(logits) + inference_end = datetime.datetime.now() + + results = [] + for sentence, probs in zip(sentences, probabilities): + label_idx = int(np.argmax(probs)) + label = _labels.get(label_idx, str(label_idx)) + results.append( + { + "text": sentence, + "label": label, + "confidence": float(probs[label_idx]), + "raw_scores": probs.tolist(), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": results}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/python/init.sh b/benchmarks/400.inference/412.language-bert/python/init.sh new file mode 100755 index 000000000..160852abe --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for the BERT inference benchmark. diff --git a/benchmarks/400.inference/412.language-bert/python/package.sh b/benchmarks/400.inference/412.language-bert/python/package.sh new file mode 100644 index 000000000..edb27ebe0 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/package.sh @@ -0,0 +1,35 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +TORCH_DIR=".python_packages/lib/site-packages/torch" +if [ -d "$1/${TORCH_DIR}" ]; then + cd $1 + zip -qr torch.zip ${TORCH_DIR} + rm -rf ${TORCH_DIR} + cd ${CUR_DIR} + echo "Torch-zipped size $(du -sh $1 | cut -f1)" +fi diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt b/benchmarks/400.inference/412.language-bert/python/requirements.txt new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/docs/benchmarks.md b/docs/benchmarks.md index e292a4b04..73056c86c 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -10,6 +10,7 @@ | Multimedia | 220.video-processing | Python | x64, arm64 | Add a watermark and generate gif of a video file. | | Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. | | Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. | +| Inference | 412.language-bert | Python | x64 | Sentence classification with a compact BERT model served via ONNX Runtime. | | Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. | | Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. | | Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. | @@ -70,6 +71,10 @@ It implements the .zip file creation with the help of the `shutil` standard libr The benchmark is inspired by MLPerf and implements image recognition with Resnet50. It downloads the input and model from the storage and uses the CPU-only `pytorch` library in Python. +### Language Inference + +This benchmark runs sequence classification with a compact BERT model exported to ONNX. The function downloads the model archive and text samples from storage, tokenizes the sentences, executes the ONNX Runtime session, and returns the predicted labels together with confidences. + ## Scientific ### Graph PageRank, BFS, MST @@ -87,4 +92,3 @@ This benchmark is inspired by the [DNAVisualization](https://github.com/Benjamin ## Applications **(WiP)** Coming soon! - diff --git a/install.py b/install.py index 57f047d23..b856e45b7 100755 --- a/install.py +++ b/install.py @@ -86,7 +86,7 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") + execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") else: raise error @@ -99,4 +99,3 @@ def execute(cmd, cwd=None): execute("python3 setup.py build") execute("python3 pypapi/papi_build.py") os.chdir(cur_dir) - diff --git a/sebs/regression.py b/sebs/regression.py index 579760a1c..e0eaf7f4f 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -21,6 +21,7 @@ "220.video-processing", "311.compression", "411.image-recognition", + "412.language-bert", "501.graph-pagerank", "502.graph-mst", "503.graph-bfs",