Skip to content

Commit 871859f

Browse files
jessiebrokeclaude
andcommitted
feat: add upload benchmark suite for 10GB/100GB/1TB files
Introduce shell-based benchmark harness to measure upload performance and memory footprint across large file sizes, with tool-agnostic JSON results for future comparison against IPFS and BitTorrent. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent cec0aae commit 871859f

4 files changed

Lines changed: 854 additions & 0 deletions

File tree

benchmarks/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
ceremony
22
circuit_bench_*
3+
results/
4+
testdata/

benchmarks/archivist_bench.sh

Lines changed: 389 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,389 @@
1+
#!/usr/bin/env bash
2+
# archivist_bench.sh — Upload benchmark for archivist-node
3+
#
4+
# Usage:
5+
# ./archivist_bench.sh [OPTIONS] [SIZE...]
6+
#
7+
# Options:
8+
# --binary PATH Path to archivist binary (default: ../build/archivist)
9+
# --data-dir PATH Node data directory (default: /tmp/archivist-bench-data)
10+
# --api-port PORT REST API port (default: 8080)
11+
# --metrics-port PORT Metrics port (default: 8008)
12+
# --repo-kind KIND fs or sqlite (default: fs)
13+
# --num-threads N Worker threads, 0=auto (default: 0)
14+
# --direct-io Enable O_DIRECT
15+
# --no-fsync Disable fsync-file and fsync-dir
16+
# --runs N Repetitions per size (default: 1)
17+
# --output-dir PATH Results directory (default: ./results)
18+
# --skip-build Do not build the binary
19+
#
20+
# Sizes: 10GB, 100GB, 1TB (default: all three)
21+
22+
set -euo pipefail
23+
24+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
25+
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
26+
27+
# shellcheck source=bench_common.sh
28+
source "${SCRIPT_DIR}/bench_common.sh"
29+
30+
# ---------- defaults ----------
31+
BINARY="${REPO_ROOT}/build/archivist"
32+
DATA_DIR="/tmp/archivist-bench-data"
33+
API_PORT=8080
34+
METRICS_PORT=8008
35+
REPO_KIND="fs"
36+
NUM_THREADS=0
37+
DIRECT_IO=false
38+
NO_FSYNC=false
39+
RUNS=1
40+
OUTPUT_DIR="${SCRIPT_DIR}/results"
41+
SKIP_BUILD=false
42+
SIZES=()
43+
44+
# ---------- parse arguments ----------
45+
while [[ $# -gt 0 ]]; do
46+
case "$1" in
47+
--binary) BINARY="$2"; shift 2 ;;
48+
--data-dir) DATA_DIR="$2"; shift 2 ;;
49+
--api-port) API_PORT="$2"; shift 2 ;;
50+
--metrics-port) METRICS_PORT="$2"; shift 2 ;;
51+
--repo-kind) REPO_KIND="$2"; shift 2 ;;
52+
--num-threads) NUM_THREADS="$2"; shift 2 ;;
53+
--direct-io) DIRECT_IO=true; shift ;;
54+
--no-fsync) NO_FSYNC=true; shift ;;
55+
--runs) RUNS="$2"; shift 2 ;;
56+
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
57+
--skip-build) SKIP_BUILD=true; shift ;;
58+
--help|-h)
59+
sed -n '2,/^$/s/^# \?//p' "$0"
60+
exit 0
61+
;;
62+
10GB|100GB|1TB) SIZES+=("$1"); shift ;;
63+
*)
64+
log_error "Unknown option: $1"
65+
exit 1
66+
;;
67+
esac
68+
done
69+
70+
# Default to all sizes if none specified
71+
if [[ ${#SIZES[@]} -eq 0 ]]; then
72+
SIZES=(10GB 100GB 1TB)
73+
fi
74+
75+
mkdir -p "$OUTPUT_DIR"
76+
77+
# ---------- build ----------
78+
build_binary() {
79+
if [[ "$SKIP_BUILD" == true ]]; then
80+
log_info "Skipping build (--skip-build)"
81+
return
82+
fi
83+
84+
log_info "Building archivist (release)..."
85+
mkdir -p "${REPO_ROOT}/build"
86+
87+
(
88+
cd "$REPO_ROOT"
89+
nim c \
90+
-d:release \
91+
-o:build/archivist \
92+
archivist/archivist.nim
93+
)
94+
95+
log_info "Build complete: ${BINARY}"
96+
}
97+
98+
# ---------- node lifecycle ----------
99+
NODE_PID=""
100+
MEM_MON_PID=""
101+
102+
cleanup() {
103+
log_info "Cleaning up..."
104+
if [[ -n "$MEM_MON_PID" ]] && kill -0 "$MEM_MON_PID" 2>/dev/null; then
105+
kill "$MEM_MON_PID" 2>/dev/null || true
106+
wait "$MEM_MON_PID" 2>/dev/null || true
107+
MEM_MON_PID=""
108+
fi
109+
if [[ -n "$NODE_PID" ]] && kill -0 "$NODE_PID" 2>/dev/null; then
110+
kill "$NODE_PID" 2>/dev/null || true
111+
wait "$NODE_PID" 2>/dev/null || true
112+
NODE_PID=""
113+
fi
114+
}
115+
116+
trap cleanup EXIT INT TERM
117+
118+
start_node() {
119+
local file_size_bytes=$1
120+
121+
# Storage quota = 120% of file size
122+
local quota=$(( file_size_bytes * 12 / 10 ))
123+
124+
rm -rf "$DATA_DIR"
125+
mkdir -p "$DATA_DIR"
126+
127+
local node_args=(
128+
"--data-dir=${DATA_DIR}"
129+
"--api-port=${API_PORT}"
130+
"--metrics"
131+
"--metrics-port=${METRICS_PORT}"
132+
"--repo-kind=${REPO_KIND}"
133+
"--storage-quota=${quota}"
134+
"--block-ttl=0"
135+
"--nat=none"
136+
"--log-level=WARN"
137+
"--num-threads=${NUM_THREADS}"
138+
)
139+
140+
if [[ "$DIRECT_IO" == true ]]; then
141+
node_args+=("--fs-direct-io")
142+
fi
143+
144+
if [[ "$NO_FSYNC" == true ]]; then
145+
node_args+=("--fs-fsync-file=false" "--fs-fsync-dir=false")
146+
fi
147+
148+
log_info "Starting archivist node..."
149+
log_info " ${BINARY} ${node_args[*]}"
150+
151+
"$BINARY" "${node_args[@]}" &
152+
NODE_PID=$!
153+
154+
# Wait for node readiness
155+
log_info "Waiting for node to be ready (PID=${NODE_PID})..."
156+
local deadline=$(( $(date +%s) + 60 ))
157+
while true; do
158+
if curl -sf "http://127.0.0.1:${API_PORT}/api/archivist/v1/debug/info" > /dev/null 2>&1; then
159+
log_info "Node is ready"
160+
break
161+
fi
162+
if ! kill -0 "$NODE_PID" 2>/dev/null; then
163+
log_error "Node process died during startup"
164+
return 1
165+
fi
166+
if (( $(date +%s) > deadline )); then
167+
log_error "Node readiness timeout (60s)"
168+
return 1
169+
fi
170+
sleep 0.5
171+
done
172+
}
173+
174+
stop_node() {
175+
if [[ -n "$NODE_PID" ]] && kill -0 "$NODE_PID" 2>/dev/null; then
176+
log_info "Stopping node (PID=${NODE_PID})..."
177+
kill "$NODE_PID" 2>/dev/null || true
178+
wait "$NODE_PID" 2>/dev/null || true
179+
NODE_PID=""
180+
fi
181+
if [[ -d "$DATA_DIR" ]]; then
182+
rm -rf "$DATA_DIR"
183+
fi
184+
}
185+
186+
# ---------- upload ----------
187+
upload_file() {
188+
local test_file=$1
189+
# Stream file via stdin to avoid curl buffering the whole thing
190+
curl -s -X POST \
191+
-H "Content-Type: application/octet-stream" \
192+
-H "Expect:" \
193+
--data-binary @- \
194+
"http://127.0.0.1:${API_PORT}/api/archivist/v1/data" \
195+
< "$test_file"
196+
}
197+
198+
# ---------- main benchmark loop ----------
199+
run_benchmark() {
200+
local label=$1 run_num=$2
201+
202+
log_info "=========================================="
203+
log_info "Benchmark: ${label} — run ${run_num}/${RUNS}"
204+
log_info "=========================================="
205+
206+
local file_size_bytes
207+
file_size_bytes=$(size_label_to_bytes "$label")
208+
209+
# Ensure test file exists
210+
local test_file
211+
test_file=$(ensure_test_file "$label")
212+
log_info "Test file: ${test_file}"
213+
214+
# Check disk space for stored data (1.2× file size)
215+
local data_dir_parent
216+
data_dir_parent=$(dirname "$DATA_DIR")
217+
local needed_store=$(( file_size_bytes * 12 / 10 ))
218+
check_disk_space "$needed_store" "$data_dir_parent"
219+
220+
# Start node
221+
start_node "$file_size_bytes"
222+
223+
local run_tag="${label}_run${run_num}_$(date +%s)"
224+
local mem_csv="${OUTPUT_DIR}/mem_${run_tag}.csv"
225+
local metrics_before="${OUTPUT_DIR}/metrics_before_${run_tag}.txt"
226+
local metrics_after="${OUTPUT_DIR}/metrics_after_${run_tag}.txt"
227+
local result_json="${OUTPUT_DIR}/result_${run_tag}.json"
228+
229+
# Start memory monitor
230+
MEM_MON_PID=$(start_memory_monitor "$NODE_PID" "$mem_csv" 1)
231+
log_info "Memory monitor PID=${MEM_MON_PID}${mem_csv}"
232+
233+
# Snapshot Prometheus before
234+
local metrics_url="http://127.0.0.1:${METRICS_PORT}/metrics"
235+
scrape_prometheus_metrics "$metrics_url" "$metrics_before"
236+
237+
# Read CPU ticks before
238+
local cpu_before
239+
cpu_before=$(read_cpu_ticks "$NODE_PID")
240+
local utime_before stime_before
241+
utime_before=$(echo "$cpu_before" | awk '{print $1}')
242+
stime_before=$(echo "$cpu_before" | awk '{print $2}')
243+
244+
# Upload
245+
log_info "Starting upload of ${label}..."
246+
local wall_start wall_end
247+
wall_start=$(date +%s.%N)
248+
249+
local cid
250+
cid=$(upload_file "$test_file")
251+
252+
wall_end=$(date +%s.%N)
253+
254+
if [[ -z "$cid" ]]; then
255+
log_error "Upload returned empty CID"
256+
stop_node
257+
return 1
258+
fi
259+
log_info "Upload complete — CID: ${cid}"
260+
261+
# Read CPU ticks after
262+
local cpu_after
263+
cpu_after=$(read_cpu_ticks "$NODE_PID")
264+
local utime_after stime_after
265+
utime_after=$(echo "$cpu_after" | awk '{print $1}')
266+
stime_after=$(echo "$cpu_after" | awk '{print $2}')
267+
268+
# Snapshot Prometheus after
269+
scrape_prometheus_metrics "$metrics_url" "$metrics_after"
270+
271+
# Stop memory monitor
272+
if [[ -n "$MEM_MON_PID" ]] && kill -0 "$MEM_MON_PID" 2>/dev/null; then
273+
kill "$MEM_MON_PID" 2>/dev/null || true
274+
wait "$MEM_MON_PID" 2>/dev/null || true
275+
MEM_MON_PID=""
276+
fi
277+
278+
# ---------- compute results ----------
279+
280+
# Wall time & throughput
281+
local upload_time_s throughput_mbps
282+
upload_time_s=$(awk "BEGIN {printf \"%.2f\", ${wall_end} - ${wall_start}}")
283+
throughput_mbps=$(awk "BEGIN {
284+
t = ${wall_end} - ${wall_start}
285+
if (t > 0) printf \"%.1f\", (${file_size_bytes} / 1048576) / t
286+
else print 0
287+
}")
288+
289+
# Memory
290+
local mem_stats peak_memory_mb avg_memory_mb
291+
mem_stats=$(compute_memory_stats "$mem_csv")
292+
peak_memory_mb=$(echo "$mem_stats" | awk '{print $1}')
293+
avg_memory_mb=$(echo "$mem_stats" | awk '{print $2}')
294+
295+
# CPU
296+
local utime_delta stime_delta cpu_user_s cpu_system_s
297+
utime_delta=$(( utime_after - utime_before ))
298+
stime_delta=$(( stime_after - stime_before ))
299+
cpu_user_s=$(ticks_to_seconds "$utime_delta")
300+
cpu_system_s=$(ticks_to_seconds "$stime_delta")
301+
302+
# Phase breakdown from Prometheus
303+
local phase_read phase_hash phase_write phase_tree phase_proofs
304+
phase_read=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_read_duration_seconds")
305+
phase_hash=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_hash_duration_seconds")
306+
phase_write=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_write_duration_seconds")
307+
phase_tree=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_tree_duration_seconds")
308+
phase_proofs=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_proofs_duration_seconds")
309+
310+
# Config JSON
311+
local config_json
312+
config_json=$(cat <<EOF
313+
{
314+
"block_size": 65536,
315+
"repo_kind": "${REPO_KIND}",
316+
"num_threads": ${NUM_THREADS},
317+
"direct_io": ${DIRECT_IO},
318+
"no_fsync": ${NO_FSYNC}
319+
}
320+
EOF
321+
)
322+
323+
# System JSON
324+
local system_json
325+
system_json=$(collect_system_info)
326+
327+
# Print summary
328+
log_info "--- Results: ${label} run ${run_num} ---"
329+
log_info " Upload time: ${upload_time_s}s"
330+
log_info " Throughput: ${throughput_mbps} MB/s"
331+
log_info " Peak memory: ${peak_memory_mb} MB"
332+
log_info " Avg memory: ${avg_memory_mb} MB"
333+
log_info " CPU user: ${cpu_user_s}s"
334+
log_info " CPU system: ${cpu_system_s}s"
335+
log_info " CID: ${cid}"
336+
337+
# Write result JSON
338+
write_result_json "$result_json" \
339+
"archivist" "$label" "$file_size_bytes" \
340+
"$upload_time_s" "$throughput_mbps" \
341+
"$peak_memory_mb" "$avg_memory_mb" \
342+
"$cpu_user_s" "$cpu_system_s" \
343+
"$phase_read" "$phase_hash" "$phase_write" "$phase_tree" "$phase_proofs" \
344+
"$cid" "$config_json" "$system_json"
345+
346+
# Write CSV row
347+
local summary_csv="${OUTPUT_DIR}/summary.csv"
348+
write_csv_header "$summary_csv"
349+
write_csv_row "$summary_csv" \
350+
"archivist" "$label" "$file_size_bytes" \
351+
"$upload_time_s" "$throughput_mbps" \
352+
"$peak_memory_mb" "$avg_memory_mb" \
353+
"$cpu_user_s" "$cpu_system_s" \
354+
"$phase_read" "$phase_hash" "$phase_write" "$phase_tree" "$phase_proofs" \
355+
"$cid"
356+
357+
# Stop node & clean data dir
358+
stop_node
359+
}
360+
361+
# ---------- main ----------
362+
main() {
363+
log_info "Archivist Upload Benchmark"
364+
log_info "Sizes: ${SIZES[*]}"
365+
log_info "Runs per size: ${RUNS}"
366+
log_info "Output: ${OUTPUT_DIR}"
367+
368+
# Build
369+
build_binary
370+
371+
if [[ ! -x "$BINARY" ]]; then
372+
log_error "Binary not found or not executable: ${BINARY}"
373+
exit 1
374+
fi
375+
376+
# Run benchmarks
377+
for label in "${SIZES[@]}"; do
378+
for (( run = 1; run <= RUNS; run++ )); do
379+
run_benchmark "$label" "$run"
380+
done
381+
done
382+
383+
log_info "=========================================="
384+
log_info "All benchmarks complete!"
385+
log_info "Results in: ${OUTPUT_DIR}"
386+
log_info "=========================================="
387+
}
388+
389+
main

0 commit comments

Comments
 (0)