Skip to content

Commit d9ed506

Browse files
authored
Merge pull request #11 from McLavish/feature/inference-recommender
added recommender benchmark
2 parents 074d4b7 + e154ba0 commit d9ed506

File tree

12 files changed

+198
-0
lines changed

12 files changed

+198
-0
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"timeout": 60,
3+
"memory": 1024,
4+
"languages": ["python"],
5+
"modules": ["storage"]
6+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import os
2+
3+
4+
def buckets_count():
5+
return (2, 0)
6+
7+
8+
def upload_files(data_root, data_dir, upload_func):
9+
for root, _, files in os.walk(data_dir):
10+
prefix = os.path.relpath(root, data_root)
11+
for file in files:
12+
upload_func(0, os.path.join(prefix, file), os.path.join(root, file))
13+
14+
15+
def generate_input(
16+
data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func
17+
):
18+
model_file = "dlrm_tiny.pt"
19+
upload_func(0, model_file, os.path.join(data_dir, "model", model_file))
20+
21+
requests_file = "requests.jsonl"
22+
upload_func(1, requests_file, os.path.join(data_dir, "data", requests_file))
23+
24+
cfg = {"object": {}, "bucket": {}}
25+
cfg["object"]["model"] = model_file
26+
cfg["object"]["requests"] = requests_file
27+
cfg["bucket"]["bucket"] = benchmarks_bucket
28+
cfg["bucket"]["model"] = input_paths[0]
29+
cfg["bucket"]["requests"] = input_paths[1]
30+
return cfg
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import datetime
2+
import json
3+
import os
4+
import uuid
5+
6+
import torch
7+
import torch.nn as nn
8+
9+
from . import storage
10+
11+
client = storage.storage.get_instance()
12+
13+
MODEL_FILE = "dlrm_tiny.pt"
14+
MODEL_CACHE = "/tmp/dlrm_gpu_model"
15+
16+
_model = None
17+
_device = torch.device("cpu")
18+
19+
20+
class TinyDLRM(nn.Module):
21+
def __init__(self, num_users, num_items, num_categories, embed_dim=8):
22+
super().__init__()
23+
self.user_emb = nn.Embedding(num_users, embed_dim)
24+
self.item_emb = nn.Embedding(num_items, embed_dim)
25+
self.category_emb = nn.Embedding(num_categories, embed_dim)
26+
in_dim = embed_dim * 3 + 2
27+
hidden = 16
28+
self.mlp = nn.Sequential(
29+
nn.Linear(in_dim, hidden),
30+
nn.ReLU(),
31+
nn.Linear(hidden, 1),
32+
)
33+
34+
def forward(self, user_id, item_id, category_id, dense):
35+
features = torch.cat(
36+
[
37+
self.user_emb(user_id),
38+
self.item_emb(item_id),
39+
self.category_emb(category_id),
40+
dense,
41+
],
42+
dim=-1,
43+
)
44+
return torch.sigmoid(self.mlp(features))
45+
46+
47+
def _select_device():
48+
if torch.cuda.is_available():
49+
return torch.device("cuda")
50+
raise RuntimeError("CUDA is not available")
51+
return torch.device("cpu")
52+
53+
54+
def _load_model(bucket, prefix):
55+
global _model, _device
56+
57+
if _model is not None:
58+
return 0.0, 0.0
59+
60+
download_begin = datetime.datetime.now()
61+
os.makedirs(MODEL_CACHE, exist_ok=True)
62+
tmp_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_FILE}")
63+
client.download(bucket, os.path.join(prefix, MODEL_FILE), tmp_path)
64+
download_end = datetime.datetime.now()
65+
66+
process_begin = datetime.datetime.now()
67+
checkpoint = torch.load(tmp_path, map_location="cpu")
68+
meta = checkpoint["meta"]
69+
_device = _select_device()
70+
model = TinyDLRM(
71+
meta["num_users"], meta["num_items"], meta["num_categories"], meta["embed_dim"]
72+
)
73+
model.load_state_dict(checkpoint["state_dict"])
74+
model.to(_device)
75+
model.eval()
76+
_model = model
77+
os.remove(tmp_path)
78+
process_end = datetime.datetime.now()
79+
80+
download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1)
81+
process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1)
82+
return download_time, process_time
83+
84+
85+
def _prepare_batch(requests):
86+
user_ids = torch.tensor([req["user_id"] for req in requests], dtype=torch.long, device=_device)
87+
item_ids = torch.tensor([req["item_id"] for req in requests], dtype=torch.long, device=_device)
88+
category_ids = torch.tensor(
89+
[req["category_id"] for req in requests], dtype=torch.long, device=_device
90+
)
91+
dense = torch.tensor(
92+
[req.get("dense", [0.0, 0.0]) for req in requests], dtype=torch.float32, device=_device
93+
)
94+
return user_ids, item_ids, category_ids, dense
95+
96+
97+
def handler(event):
98+
bucket = event.get("bucket", {}).get("bucket")
99+
model_prefix = event.get("bucket", {}).get("model")
100+
requests_prefix = event.get("bucket", {}).get("requests")
101+
requests_key = event.get("object", {}).get("requests")
102+
103+
download_begin = datetime.datetime.now()
104+
req_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(requests_key)}")
105+
client.download(bucket, os.path.join(requests_prefix, requests_key), req_path)
106+
download_end = datetime.datetime.now()
107+
108+
model_download_time, model_process_time = _load_model(bucket, model_prefix)
109+
110+
with open(req_path, "r") as f:
111+
payloads = [json.loads(line) for line in f if line.strip()]
112+
os.remove(req_path)
113+
114+
inference_begin = datetime.datetime.now()
115+
user_ids, item_ids, category_ids, dense = _prepare_batch(payloads)
116+
117+
with torch.no_grad():
118+
scores = _model(user_ids, item_ids, category_ids, dense).squeeze(-1).tolist()
119+
inference_end = datetime.datetime.now()
120+
121+
predictions = []
122+
for req, score in zip(payloads, scores):
123+
predictions.append(
124+
{
125+
"user_id": req["user_id"],
126+
"item_id": req["item_id"],
127+
"category_id": req["category_id"],
128+
"score": score,
129+
"device": str(_device),
130+
}
131+
)
132+
133+
download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1)
134+
compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1)
135+
136+
return {
137+
"result": {"predictions": predictions},
138+
"measurement": {
139+
"download_time": download_time + model_download_time,
140+
"compute_time": compute_time + model_process_time,
141+
"model_time": model_process_time,
142+
"model_download_time": model_download_time,
143+
},
144+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
# No additional initialization required for GPU recommendation benchmark.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
PACKAGE_DIR=$1
4+
echo "DLRM GPU package size $(du -sh $1 | cut -f1)"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
torch==2.2.2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
torch==2.2.2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
torch==2.2.2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
torch==2.2.2
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
torch==2.2.2

0 commit comments

Comments
 (0)