-
Notifications
You must be signed in to change notification settings - Fork 0
Feature/bert inference #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
550cc8c
2b75311
813af03
3a96f04
aae1023
25fd1d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "timeout": 60, | ||
| "memory": 512, | ||
| "languages": ["python"], | ||
| "modules": ["storage"] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| import os | ||
|
|
||
|
|
||
| def buckets_count(): | ||
| # model bucket and text bucket | ||
| return (2, 0) | ||
|
|
||
|
|
||
| def upload_files(data_root, data_dir, upload_func): | ||
| for root, _, files in os.walk(data_dir): | ||
| prefix = os.path.relpath(root, data_root) | ||
| for file in files: | ||
| filepath = os.path.join(root, file) | ||
| relative_key = os.path.join(prefix, file) | ||
| upload_func(0, relative_key, filepath) | ||
|
|
||
|
|
||
| def generate_input( | ||
| data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func | ||
| ): | ||
| model_archive = "bert-tiny-onnx.tar.gz" | ||
| upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) | ||
|
|
||
| text_filename = "sentences.jsonl" | ||
| upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename)) | ||
|
|
||
| input_config = {"object": {}, "bucket": {}} | ||
| input_config["object"]["model"] = model_archive | ||
| input_config["object"]["input"] = text_filename | ||
| input_config["bucket"]["bucket"] = benchmarks_bucket | ||
| input_config["bucket"]["model"] = input_paths[0] | ||
| input_config["bucket"]["text"] = input_paths[1] | ||
| return input_config |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,157 @@ | ||||||||||||||||||||||||||
| import datetime | ||||||||||||||||||||||||||
| import json | ||||||||||||||||||||||||||
| import os | ||||||||||||||||||||||||||
| import tarfile | ||||||||||||||||||||||||||
| import uuid | ||||||||||||||||||||||||||
| from typing import Dict, List, Optional | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| import numpy as np | ||||||||||||||||||||||||||
| import onnxruntime as ort | ||||||||||||||||||||||||||
| from tokenizers import Tokenizer | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| from . import storage | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| client = storage.storage.get_instance() | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz" | ||||||||||||||||||||||||||
| MODEL_DIRECTORY = "/tmp/bert_language_model" | ||||||||||||||||||||||||||
| MODEL_SUBDIR = "bert-tiny-onnx" | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| _session: Optional[ort.InferenceSession] = None | ||||||||||||||||||||||||||
| _tokenizer: Optional[Tokenizer] = None | ||||||||||||||||||||||||||
| _labels: Optional[Dict[int, str]] = None | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| def _ensure_model(bucket: str, model_prefix: str): | ||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||
| Lazily download and initialize the ONNX model and tokenizer. | ||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||
| global _session, _tokenizer, _labels | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) | ||||||||||||||||||||||||||
| model_download_begin = datetime.datetime.now() | ||||||||||||||||||||||||||
| model_download_end = model_download_begin | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| if _session is None or _tokenizer is None or _labels is None: | ||||||||||||||||||||||||||
| if not os.path.exists(model_path): | ||||||||||||||||||||||||||
| os.makedirs(MODEL_DIRECTORY, exist_ok=True) | ||||||||||||||||||||||||||
| archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}") | ||||||||||||||||||||||||||
| client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path) | ||||||||||||||||||||||||||
| model_download_end = datetime.datetime.now() | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| with tarfile.open(archive_path, "r:gz") as tar: | ||||||||||||||||||||||||||
| tar.extractall(MODEL_DIRECTORY) | ||||||||||||||||||||||||||
|
Comment on lines
+42
to
+43
|
||||||||||||||||||||||||||
| os.remove(archive_path) | ||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||
| model_download_begin = datetime.datetime.now() | ||||||||||||||||||||||||||
| model_download_end = model_download_begin | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| model_process_begin = datetime.datetime.now() | ||||||||||||||||||||||||||
| tokenizer_path = os.path.join(model_path, "tokenizer.json") | ||||||||||||||||||||||||||
| _tokenizer = Tokenizer.from_file(tokenizer_path) | ||||||||||||||||||||||||||
| _tokenizer.enable_truncation(max_length=128) | ||||||||||||||||||||||||||
| _tokenizer.enable_padding(length=128) | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| label_map_path = os.path.join(model_path, "label_map.json") | ||||||||||||||||||||||||||
| with open(label_map_path, "r") as f: | ||||||||||||||||||||||||||
| raw_labels = json.load(f) | ||||||||||||||||||||||||||
| _labels = {int(idx): label for idx, label in raw_labels.items()} | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| onnx_path = os.path.join(model_path, "model.onnx") | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| available = ort.get_available_providers() | ||||||||||||||||||||||||||
| if "CUDAExecutionProvider" not in available: | ||||||||||||||||||||||||||
| raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") | ||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||
| _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) | ||||||||||||||||||||||||||
|
Comment on lines
+63
to
+66
|
||||||||||||||||||||||||||
| if "CUDAExecutionProvider" not in available: | |
| raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") | |
| _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) | |
| if "CUDAExecutionProvider" in available: | |
| providers = ["CUDAExecutionProvider"] | |
| print("Using CUDAExecutionProvider for ONNX Runtime inference.") | |
| else: | |
| providers = ["CPUExecutionProvider"] | |
| print("CUDAExecutionProvider unavailable, falling back to CPUExecutionProvider for ONNX Runtime inference.") | |
| _session = ort.InferenceSession(onnx_path, providers=providers) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't care
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| #!/bin/bash | ||
|
|
||
| # No additional initialization required for the BERT inference benchmark. |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,35 @@ | ||||||||||||||||||
| # Stripping package code is based on https://github.com/ryfeus/lambda-packs repo | ||||||||||||||||||
|
|
||||||||||||||||||
| PACKAGE_DIR=$1 | ||||||||||||||||||
| echo "Original size $(du -sh $1 | cut -f1)" | ||||||||||||||||||
|
|
||||||||||||||||||
| CUR_DIR=$(pwd) | ||||||||||||||||||
| cd $1 | ||||||||||||||||||
| # cleaning libs | ||||||||||||||||||
| rm -rf external | ||||||||||||||||||
| find . -type d -name "tests" -exec rm -rf {} + | ||||||||||||||||||
| find . -type d -name "test" -exec rm -rf {} + | ||||||||||||||||||
| find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + | ||||||||||||||||||
|
|
||||||||||||||||||
| # cleaning | ||||||||||||||||||
| # stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure | ||||||||||||||||||
| find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip | ||||||||||||||||||
| find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip | ||||||||||||||||||
|
|
||||||||||||||||||
| rm -r pip >/dev/null | ||||||||||||||||||
| rm -r pip-* >/dev/null | ||||||||||||||||||
| rm -r wheel >/dev/null | ||||||||||||||||||
| rm -r wheel-* >/dev/null | ||||||||||||||||||
| rm easy_install.py >/dev/null | ||||||||||||||||||
| find . -name \*.pyc -delete | ||||||||||||||||||
| cd ${CUR_DIR} | ||||||||||||||||||
| echo "Stripped size $(du -sh $1 | cut -f1)" | ||||||||||||||||||
|
|
||||||||||||||||||
| TORCH_DIR=".python_packages/lib/site-packages/torch" | ||||||||||||||||||
| if [ -d "$1/${TORCH_DIR}" ]; then | ||||||||||||||||||
| cd $1 | ||||||||||||||||||
| zip -qr torch.zip ${TORCH_DIR} | ||||||||||||||||||
| rm -rf ${TORCH_DIR} | ||||||||||||||||||
| cd ${CUR_DIR} | ||||||||||||||||||
| echo "Torch-zipped size $(du -sh $1 | cut -f1)" | ||||||||||||||||||
| fi | ||||||||||||||||||
|
Comment on lines
+28
to
+35
|
||||||||||||||||||
| TORCH_DIR=".python_packages/lib/site-packages/torch" | |
| if [ -d "$1/${TORCH_DIR}" ]; then | |
| cd $1 | |
| zip -qr torch.zip ${TORCH_DIR} | |
| rm -rf ${TORCH_DIR} | |
| cd ${CUR_DIR} | |
| echo "Torch-zipped size $(du -sh $1 | cut -f1)" | |
| fi |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NO
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,3 @@ | ||||||
| numpy==1.24.4 | ||||||
| onnxruntime-gpu==1.16.3 | ||||||
|
||||||
| onnxruntime-gpu==1.16.3 | |
| onnxruntime==1.16.3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't care
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| numpy==1.24.4 | ||
| onnxruntime-gpu==1.16.3 | ||
| tokenizers==0.13.3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| numpy==1.24.4 | ||
| onnxruntime-gpu==1.16.3 | ||
| tokenizers==0.13.3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| numpy==1.24.4 | ||
| onnxruntime-gpu==1.16.3 | ||
| tokenizers==0.13.3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| numpy==1.24.4 | ||
| onnxruntime-gpu==1.16.3 | ||
| tokenizers==0.13.3 |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -86,7 +86,7 @@ def execute(cmd, cwd=None): | |||||
| execute(f"git pull", cwd=data_dir) | ||||||
| # clone | ||||||
| else: | ||||||
| execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") | ||||||
| execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") | ||||||
|
||||||
| execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") | |
| execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
who
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The submodule URL change to a personal fork (
McLavish/serverless-benchmarks-data-dphpc) may cause synchronization issues with the main project. If this fork contains essential data for the new benchmark, consider contributing it back to the upstream repository or documenting the dependency clearly in the project README.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
asked