Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
url = https://github.com/mcopik/pypapi.git
[submodule "benchmarks-data"]
path = benchmarks-data
url = https://github.com/spcl/serverless-benchmarks-data.git
url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The submodule URL change to a personal fork (McLavish/serverless-benchmarks-data-dphpc) may cause synchronization issues with the main project. If this fork contains essential data for the new benchmark, consider contributing it back to the upstream repository or documenting the dependency clearly in the project README.

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

asked

3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
[mypy-docker]
ignore_missing_imports = True

[mypy-docker.*]
ignore_missing_imports = True

[mypy-tzlocal]
ignore_missing_imports = True

Expand Down
6 changes: 6 additions & 0 deletions benchmarks/400.inference/412.language-bert/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"timeout": 60,
"memory": 512,
"languages": ["python"],
"modules": ["storage"]
}
33 changes: 33 additions & 0 deletions benchmarks/400.inference/412.language-bert/input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os


def buckets_count():
# model bucket and text bucket
return (2, 0)


def upload_files(data_root, data_dir, upload_func):
for root, _, files in os.walk(data_dir):
prefix = os.path.relpath(root, data_root)
for file in files:
filepath = os.path.join(root, file)
relative_key = os.path.join(prefix, file)
upload_func(0, relative_key, filepath)


def generate_input(
data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func
):
model_archive = "bert-tiny-onnx.tar.gz"
upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive))

text_filename = "sentences.jsonl"
upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename))

input_config = {"object": {}, "bucket": {}}
input_config["object"]["model"] = model_archive
input_config["object"]["input"] = text_filename
input_config["bucket"]["bucket"] = benchmarks_bucket
input_config["bucket"]["model"] = input_paths[0]
input_config["bucket"]["text"] = input_paths[1]
return input_config
157 changes: 157 additions & 0 deletions benchmarks/400.inference/412.language-bert/python/function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import datetime
import json
import os
import tarfile
import uuid
from typing import Dict, List, Optional

import numpy as np
import onnxruntime as ort
from tokenizers import Tokenizer

from . import storage

client = storage.storage.get_instance()

MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz"
MODEL_DIRECTORY = "/tmp/bert_language_model"
MODEL_SUBDIR = "bert-tiny-onnx"

_session: Optional[ort.InferenceSession] = None
_tokenizer: Optional[Tokenizer] = None
_labels: Optional[Dict[int, str]] = None


def _ensure_model(bucket: str, model_prefix: str):
"""
Lazily download and initialize the ONNX model and tokenizer.
"""
global _session, _tokenizer, _labels

model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR)
model_download_begin = datetime.datetime.now()
model_download_end = model_download_begin

if _session is None or _tokenizer is None or _labels is None:
if not os.path.exists(model_path):
os.makedirs(MODEL_DIRECTORY, exist_ok=True)
archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}")
client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path)
model_download_end = datetime.datetime.now()

with tarfile.open(archive_path, "r:gz") as tar:
tar.extractall(MODEL_DIRECTORY)
Comment on lines +42 to +43
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using tar.extractall() without validation is vulnerable to path traversal attacks. Malicious archives could extract files outside the intended directory. Use tar.extractall(MODEL_DIRECTORY, filter='data') (Python 3.12+) or manually validate each member's path before extraction for older Python versions.

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't care

os.remove(archive_path)
else:
model_download_begin = datetime.datetime.now()
model_download_end = model_download_begin

model_process_begin = datetime.datetime.now()
tokenizer_path = os.path.join(model_path, "tokenizer.json")
_tokenizer = Tokenizer.from_file(tokenizer_path)
_tokenizer.enable_truncation(max_length=128)
_tokenizer.enable_padding(length=128)

label_map_path = os.path.join(model_path, "label_map.json")
with open(label_map_path, "r") as f:
raw_labels = json.load(f)
_labels = {int(idx): label for idx, label in raw_labels.items()}

onnx_path = os.path.join(model_path, "model.onnx")

available = ort.get_available_providers()
if "CUDAExecutionProvider" not in available:
raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})")

_session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"])
Comment on lines +63 to +66
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code requires CUDAExecutionProvider but the benchmark uses onnxruntime-gpu. This creates a hard dependency on GPU availability, which may fail in CPU-only environments. Consider falling back to CPUExecutionProvider if CUDA is unavailable, or document this GPU requirement clearly in the benchmark configuration. The existing 411.image-recognition benchmark uses CPU-only inference for broader compatibility.

Suggested change
if "CUDAExecutionProvider" not in available:
raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})")
_session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"])
if "CUDAExecutionProvider" in available:
providers = ["CUDAExecutionProvider"]
print("Using CUDAExecutionProvider for ONNX Runtime inference.")
else:
providers = ["CPUExecutionProvider"]
print("CUDAExecutionProvider unavailable, falling back to CPUExecutionProvider for ONNX Runtime inference.")
_session = ort.InferenceSession(onnx_path, providers=providers)

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't care

model_process_end = datetime.datetime.now()
else:
model_process_begin = datetime.datetime.now()
model_process_end = model_process_begin

model_download_time = (model_download_end - model_download_begin) / datetime.timedelta(
microseconds=1
)
model_process_time = (model_process_end - model_process_begin) / datetime.timedelta(
microseconds=1
)

return model_download_time, model_process_time


def _prepare_inputs(sentences: List[str]):
assert _tokenizer is not None

encodings = _tokenizer.encode_batch(sentences)

input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64)
attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64)
token_type_ids = np.array(
[enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings],
dtype=np.int64,
)

return {
"input_ids": input_ids,
"attention_mask": attention_mask,
"token_type_ids": token_type_ids,
}


def _softmax(logits: np.ndarray) -> np.ndarray:
shifted = logits - np.max(logits, axis=1, keepdims=True)
exp = np.exp(shifted)
return exp / np.sum(exp, axis=1, keepdims=True)


def handler(event):
bucket = event.get("bucket", {}).get("bucket")
model_prefix = event.get("bucket", {}).get("model")
text_prefix = event.get("bucket", {}).get("text")
text_key = event.get("object", {}).get("input")

download_begin = datetime.datetime.now()
text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}")
client.download(bucket, os.path.join(text_prefix, text_key), text_download_path)
download_end = datetime.datetime.now()

model_download_time, model_process_time = _ensure_model(bucket, model_prefix)
assert _session is not None and _labels is not None and _tokenizer is not None

with open(text_download_path, "r") as f:
sentences = [json.loads(line)["text"] for line in f if line.strip()]

os.remove(text_download_path)

inference_begin = datetime.datetime.now()
inputs = _prepare_inputs(sentences)
outputs = _session.run(None, inputs)
logits = outputs[0]
probabilities = _softmax(logits)
inference_end = datetime.datetime.now()

results = []
for sentence, probs in zip(sentences, probabilities):
label_idx = int(np.argmax(probs))
label = _labels.get(label_idx, str(label_idx))
results.append(
{
"text": sentence,
"label": label,
"confidence": float(probs[label_idx]),
"raw_scores": probs.tolist(),
}
)

download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1)
compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1)

return {
"result": {"predictions": results},
"measurement": {
"download_time": download_time + model_download_time,
"compute_time": compute_time + model_process_time,
"model_time": model_process_time,
"model_download_time": model_download_time,
},
}
3 changes: 3 additions & 0 deletions benchmarks/400.inference/412.language-bert/python/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

# No additional initialization required for the BERT inference benchmark.
35 changes: 35 additions & 0 deletions benchmarks/400.inference/412.language-bert/python/package.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo

PACKAGE_DIR=$1
echo "Original size $(du -sh $1 | cut -f1)"

CUR_DIR=$(pwd)
cd $1
# cleaning libs
rm -rf external
find . -type d -name "tests" -exec rm -rf {} +
find . -type d -name "test" -exec rm -rf {} +
find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} +

# cleaning
# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure
find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip
find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip

rm -r pip >/dev/null
rm -r pip-* >/dev/null
rm -r wheel >/dev/null
rm -r wheel-* >/dev/null
rm easy_install.py >/dev/null
find . -name \*.pyc -delete
cd ${CUR_DIR}
echo "Stripped size $(du -sh $1 | cut -f1)"

TORCH_DIR=".python_packages/lib/site-packages/torch"
if [ -d "$1/${TORCH_DIR}" ]; then
cd $1
zip -qr torch.zip ${TORCH_DIR}
rm -rf ${TORCH_DIR}
cd ${CUR_DIR}
echo "Torch-zipped size $(du -sh $1 | cut -f1)"
fi
Comment on lines +28 to +35
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This benchmark packages torch despite not using PyTorch in its requirements or implementation (uses ONNX Runtime instead). The torch packaging logic appears to be copied from the 411.image-recognition benchmark but is unnecessary here. Consider removing lines 28-35 to avoid confusion and reduce package size.

Suggested change
TORCH_DIR=".python_packages/lib/site-packages/torch"
if [ -d "$1/${TORCH_DIR}" ]; then
cd $1
zip -qr torch.zip ${TORCH_DIR}
rm -rf ${TORCH_DIR}
cd ${CUR_DIR}
echo "Torch-zipped size $(du -sh $1 | cut -f1)"
fi

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NO

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy==1.24.4
onnxruntime-gpu==1.16.3
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using onnxruntime-gpu requires CUDA dependencies and may not be compatible with all serverless environments. Consider using onnxruntime (CPU version) for better portability across different cloud platforms, or provide separate CPU and GPU requirement files. The comment on line 63-64 of function.py enforces GPU requirement, but many serverless platforms don't provide GPU access by default.

Suggested change
onnxruntime-gpu==1.16.3
onnxruntime==1.16.3

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't care

tokenizers==0.13.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy==1.24.4
onnxruntime-gpu==1.16.3
tokenizers==0.13.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy==1.24.4
onnxruntime-gpu==1.16.3
tokenizers==0.13.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy==1.24.4
onnxruntime-gpu==1.16.3
tokenizers==0.13.3
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy==1.24.4
onnxruntime-gpu==1.16.3
tokenizers==0.13.3
6 changes: 5 additions & 1 deletion docs/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
| Multimedia | 220.video-processing | Python | x64, arm64 | Add a watermark and generate gif of a video file. |
| Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. |
| Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. |
| Inference | 412.language-bert | Python | x64 | Sentence classification with a compact BERT model served via ONNX Runtime. |
| Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. |
| Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. |
| Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. |
Expand Down Expand Up @@ -70,6 +71,10 @@ It implements the .zip file creation with the help of the `shutil` standard libr

The benchmark is inspired by MLPerf and implements image recognition with Resnet50. It downloads the input and model from the storage and uses the CPU-only `pytorch` library in Python.

### Language Inference

This benchmark runs sequence classification with a compact BERT model exported to ONNX. The function downloads the model archive and text samples from storage, tokenizes the sentences, executes the ONNX Runtime session, and returns the predicted labels together with confidences.

## Scientific

### Graph PageRank, BFS, MST
Expand All @@ -87,4 +92,3 @@ This benchmark is inspired by the [DNAVisualization](https://github.com/Benjamin
## Applications

**(WiP)** Coming soon!

3 changes: 1 addition & 2 deletions install.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def execute(cmd, cwd=None):
execute(f"git pull", cwd=data_dir)
# clone
else:
execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}")
execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}")
Copy link

Copilot AI Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The repository URL change from spcl/serverless-benchmarks-data to McLavish/serverless-benchmarks-data-dphpc suggests this is a fork for a specific project (dphpc). Using a personal fork in production code may cause maintenance issues if the fork becomes outdated or unavailable. Consider using the official repository or clearly documenting why this fork is necessary.

Suggested change
execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}")
execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}")

Copilot uses AI. Check for mistakes.
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

who

else:
raise error

Expand All @@ -99,4 +99,3 @@ def execute(cmd, cwd=None):
execute("python3 setup.py build")
execute("python3 pypapi/papi_build.py")
os.chdir(cur_dir)

1 change: 1 addition & 0 deletions sebs/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"220.video-processing",
"311.compression",
"411.image-recognition",
"412.language-bert",
"501.graph-pagerank",
"502.graph-mst",
"503.graph-bfs",
Expand Down