|
| 1 | +#!/usr/bin/env bash |
| 2 | +# archivist_bench.sh — Upload benchmark for archivist-node |
| 3 | +# |
| 4 | +# Usage: |
| 5 | +# ./archivist_bench.sh [OPTIONS] [SIZE...] |
| 6 | +# |
| 7 | +# Options: |
| 8 | +# --binary PATH Path to archivist binary (default: ../build/archivist) |
| 9 | +# --data-dir PATH Node data directory (default: /tmp/archivist-bench-data) |
| 10 | +# --api-port PORT REST API port (default: 8080) |
| 11 | +# --metrics-port PORT Metrics port (default: 8008) |
| 12 | +# --repo-kind KIND fs or sqlite (default: fs) |
| 13 | +# --num-threads N Worker threads, 0=auto (default: 0) |
| 14 | +# --direct-io Enable O_DIRECT |
| 15 | +# --no-fsync Disable fsync-file and fsync-dir |
| 16 | +# --runs N Repetitions per size (default: 1) |
| 17 | +# --output-dir PATH Results directory (default: ./results) |
| 18 | +# --skip-build Do not build the binary |
| 19 | +# |
| 20 | +# Sizes: 10GB, 100GB, 1TB (default: all three) |
| 21 | + |
| 22 | +set -euo pipefail |
| 23 | + |
| 24 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 25 | +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" |
| 26 | + |
| 27 | +# shellcheck source=bench_common.sh |
| 28 | +source "${SCRIPT_DIR}/bench_common.sh" |
| 29 | + |
| 30 | +# ---------- defaults ---------- |
| 31 | +BINARY="${REPO_ROOT}/build/archivist" |
| 32 | +DATA_DIR="/tmp/archivist-bench-data" |
| 33 | +API_PORT=8080 |
| 34 | +METRICS_PORT=8008 |
| 35 | +REPO_KIND="fs" |
| 36 | +NUM_THREADS=0 |
| 37 | +DIRECT_IO=false |
| 38 | +NO_FSYNC=false |
| 39 | +RUNS=1 |
| 40 | +OUTPUT_DIR="${SCRIPT_DIR}/results" |
| 41 | +SKIP_BUILD=false |
| 42 | +SIZES=() |
| 43 | + |
| 44 | +# ---------- parse arguments ---------- |
| 45 | +while [[ $# -gt 0 ]]; do |
| 46 | + case "$1" in |
| 47 | + --binary) BINARY="$2"; shift 2 ;; |
| 48 | + --data-dir) DATA_DIR="$2"; shift 2 ;; |
| 49 | + --api-port) API_PORT="$2"; shift 2 ;; |
| 50 | + --metrics-port) METRICS_PORT="$2"; shift 2 ;; |
| 51 | + --repo-kind) REPO_KIND="$2"; shift 2 ;; |
| 52 | + --num-threads) NUM_THREADS="$2"; shift 2 ;; |
| 53 | + --direct-io) DIRECT_IO=true; shift ;; |
| 54 | + --no-fsync) NO_FSYNC=true; shift ;; |
| 55 | + --runs) RUNS="$2"; shift 2 ;; |
| 56 | + --output-dir) OUTPUT_DIR="$2"; shift 2 ;; |
| 57 | + --skip-build) SKIP_BUILD=true; shift ;; |
| 58 | + --help|-h) |
| 59 | + sed -n '2,/^$/s/^# \?//p' "$0" |
| 60 | + exit 0 |
| 61 | + ;; |
| 62 | + 10GB|100GB|1TB) SIZES+=("$1"); shift ;; |
| 63 | + *) |
| 64 | + log_error "Unknown option: $1" |
| 65 | + exit 1 |
| 66 | + ;; |
| 67 | + esac |
| 68 | +done |
| 69 | + |
| 70 | +# Default to all sizes if none specified |
| 71 | +if [[ ${#SIZES[@]} -eq 0 ]]; then |
| 72 | + SIZES=(10GB 100GB 1TB) |
| 73 | +fi |
| 74 | + |
| 75 | +mkdir -p "$OUTPUT_DIR" |
| 76 | + |
| 77 | +# ---------- build ---------- |
| 78 | +build_binary() { |
| 79 | + if [[ "$SKIP_BUILD" == true ]]; then |
| 80 | + log_info "Skipping build (--skip-build)" |
| 81 | + return |
| 82 | + fi |
| 83 | + |
| 84 | + log_info "Building archivist (release)..." |
| 85 | + mkdir -p "${REPO_ROOT}/build" |
| 86 | + |
| 87 | + ( |
| 88 | + cd "$REPO_ROOT" |
| 89 | + nim c \ |
| 90 | + -d:release \ |
| 91 | + -o:build/archivist \ |
| 92 | + archivist/archivist.nim |
| 93 | + ) |
| 94 | + |
| 95 | + log_info "Build complete: ${BINARY}" |
| 96 | +} |
| 97 | + |
| 98 | +# ---------- node lifecycle ---------- |
| 99 | +NODE_PID="" |
| 100 | +MEM_MON_PID="" |
| 101 | + |
| 102 | +cleanup() { |
| 103 | + log_info "Cleaning up..." |
| 104 | + if [[ -n "$MEM_MON_PID" ]] && kill -0 "$MEM_MON_PID" 2>/dev/null; then |
| 105 | + kill "$MEM_MON_PID" 2>/dev/null || true |
| 106 | + wait "$MEM_MON_PID" 2>/dev/null || true |
| 107 | + MEM_MON_PID="" |
| 108 | + fi |
| 109 | + if [[ -n "$NODE_PID" ]] && kill -0 "$NODE_PID" 2>/dev/null; then |
| 110 | + kill "$NODE_PID" 2>/dev/null || true |
| 111 | + wait "$NODE_PID" 2>/dev/null || true |
| 112 | + NODE_PID="" |
| 113 | + fi |
| 114 | +} |
| 115 | + |
| 116 | +trap cleanup EXIT INT TERM |
| 117 | + |
| 118 | +start_node() { |
| 119 | + local file_size_bytes=$1 |
| 120 | + |
| 121 | + # Storage quota = 120% of file size |
| 122 | + local quota=$(( file_size_bytes * 12 / 10 )) |
| 123 | + |
| 124 | + rm -rf "$DATA_DIR" |
| 125 | + mkdir -p "$DATA_DIR" |
| 126 | + |
| 127 | + local node_args=( |
| 128 | + "--data-dir=${DATA_DIR}" |
| 129 | + "--api-port=${API_PORT}" |
| 130 | + "--metrics" |
| 131 | + "--metrics-port=${METRICS_PORT}" |
| 132 | + "--repo-kind=${REPO_KIND}" |
| 133 | + "--storage-quota=${quota}" |
| 134 | + "--block-ttl=0" |
| 135 | + "--nat=none" |
| 136 | + "--log-level=WARN" |
| 137 | + "--num-threads=${NUM_THREADS}" |
| 138 | + ) |
| 139 | + |
| 140 | + if [[ "$DIRECT_IO" == true ]]; then |
| 141 | + node_args+=("--fs-direct-io") |
| 142 | + fi |
| 143 | + |
| 144 | + if [[ "$NO_FSYNC" == true ]]; then |
| 145 | + node_args+=("--fs-fsync-file=false" "--fs-fsync-dir=false") |
| 146 | + fi |
| 147 | + |
| 148 | + log_info "Starting archivist node..." |
| 149 | + log_info " ${BINARY} ${node_args[*]}" |
| 150 | + |
| 151 | + "$BINARY" "${node_args[@]}" & |
| 152 | + NODE_PID=$! |
| 153 | + |
| 154 | + # Wait for node readiness |
| 155 | + log_info "Waiting for node to be ready (PID=${NODE_PID})..." |
| 156 | + local deadline=$(( $(date +%s) + 60 )) |
| 157 | + while true; do |
| 158 | + if curl -sf "http://127.0.0.1:${API_PORT}/api/archivist/v1/debug/info" > /dev/null 2>&1; then |
| 159 | + log_info "Node is ready" |
| 160 | + break |
| 161 | + fi |
| 162 | + if ! kill -0 "$NODE_PID" 2>/dev/null; then |
| 163 | + log_error "Node process died during startup" |
| 164 | + return 1 |
| 165 | + fi |
| 166 | + if (( $(date +%s) > deadline )); then |
| 167 | + log_error "Node readiness timeout (60s)" |
| 168 | + return 1 |
| 169 | + fi |
| 170 | + sleep 0.5 |
| 171 | + done |
| 172 | +} |
| 173 | + |
| 174 | +stop_node() { |
| 175 | + if [[ -n "$NODE_PID" ]] && kill -0 "$NODE_PID" 2>/dev/null; then |
| 176 | + log_info "Stopping node (PID=${NODE_PID})..." |
| 177 | + kill "$NODE_PID" 2>/dev/null || true |
| 178 | + wait "$NODE_PID" 2>/dev/null || true |
| 179 | + NODE_PID="" |
| 180 | + fi |
| 181 | + if [[ -d "$DATA_DIR" ]]; then |
| 182 | + rm -rf "$DATA_DIR" |
| 183 | + fi |
| 184 | +} |
| 185 | + |
| 186 | +# ---------- upload ---------- |
| 187 | +upload_file() { |
| 188 | + local test_file=$1 |
| 189 | + # Stream file via stdin to avoid curl buffering the whole thing |
| 190 | + curl -s -X POST \ |
| 191 | + -H "Content-Type: application/octet-stream" \ |
| 192 | + -H "Expect:" \ |
| 193 | + --data-binary @- \ |
| 194 | + "http://127.0.0.1:${API_PORT}/api/archivist/v1/data" \ |
| 195 | + < "$test_file" |
| 196 | +} |
| 197 | + |
| 198 | +# ---------- main benchmark loop ---------- |
| 199 | +run_benchmark() { |
| 200 | + local label=$1 run_num=$2 |
| 201 | + |
| 202 | + log_info "==========================================" |
| 203 | + log_info "Benchmark: ${label} — run ${run_num}/${RUNS}" |
| 204 | + log_info "==========================================" |
| 205 | + |
| 206 | + local file_size_bytes |
| 207 | + file_size_bytes=$(size_label_to_bytes "$label") |
| 208 | + |
| 209 | + # Ensure test file exists |
| 210 | + local test_file |
| 211 | + test_file=$(ensure_test_file "$label") |
| 212 | + log_info "Test file: ${test_file}" |
| 213 | + |
| 214 | + # Check disk space for stored data (1.2× file size) |
| 215 | + local data_dir_parent |
| 216 | + data_dir_parent=$(dirname "$DATA_DIR") |
| 217 | + local needed_store=$(( file_size_bytes * 12 / 10 )) |
| 218 | + check_disk_space "$needed_store" "$data_dir_parent" |
| 219 | + |
| 220 | + # Start node |
| 221 | + start_node "$file_size_bytes" |
| 222 | + |
| 223 | + local run_tag="${label}_run${run_num}_$(date +%s)" |
| 224 | + local mem_csv="${OUTPUT_DIR}/mem_${run_tag}.csv" |
| 225 | + local metrics_before="${OUTPUT_DIR}/metrics_before_${run_tag}.txt" |
| 226 | + local metrics_after="${OUTPUT_DIR}/metrics_after_${run_tag}.txt" |
| 227 | + local result_json="${OUTPUT_DIR}/result_${run_tag}.json" |
| 228 | + |
| 229 | + # Start memory monitor |
| 230 | + MEM_MON_PID=$(start_memory_monitor "$NODE_PID" "$mem_csv" 1) |
| 231 | + log_info "Memory monitor PID=${MEM_MON_PID} → ${mem_csv}" |
| 232 | + |
| 233 | + # Snapshot Prometheus before |
| 234 | + local metrics_url="http://127.0.0.1:${METRICS_PORT}/metrics" |
| 235 | + scrape_prometheus_metrics "$metrics_url" "$metrics_before" |
| 236 | + |
| 237 | + # Read CPU ticks before |
| 238 | + local cpu_before |
| 239 | + cpu_before=$(read_cpu_ticks "$NODE_PID") |
| 240 | + local utime_before stime_before |
| 241 | + utime_before=$(echo "$cpu_before" | awk '{print $1}') |
| 242 | + stime_before=$(echo "$cpu_before" | awk '{print $2}') |
| 243 | + |
| 244 | + # Upload |
| 245 | + log_info "Starting upload of ${label}..." |
| 246 | + local wall_start wall_end |
| 247 | + wall_start=$(date +%s.%N) |
| 248 | + |
| 249 | + local cid |
| 250 | + cid=$(upload_file "$test_file") |
| 251 | + |
| 252 | + wall_end=$(date +%s.%N) |
| 253 | + |
| 254 | + if [[ -z "$cid" ]]; then |
| 255 | + log_error "Upload returned empty CID" |
| 256 | + stop_node |
| 257 | + return 1 |
| 258 | + fi |
| 259 | + log_info "Upload complete — CID: ${cid}" |
| 260 | + |
| 261 | + # Read CPU ticks after |
| 262 | + local cpu_after |
| 263 | + cpu_after=$(read_cpu_ticks "$NODE_PID") |
| 264 | + local utime_after stime_after |
| 265 | + utime_after=$(echo "$cpu_after" | awk '{print $1}') |
| 266 | + stime_after=$(echo "$cpu_after" | awk '{print $2}') |
| 267 | + |
| 268 | + # Snapshot Prometheus after |
| 269 | + scrape_prometheus_metrics "$metrics_url" "$metrics_after" |
| 270 | + |
| 271 | + # Stop memory monitor |
| 272 | + if [[ -n "$MEM_MON_PID" ]] && kill -0 "$MEM_MON_PID" 2>/dev/null; then |
| 273 | + kill "$MEM_MON_PID" 2>/dev/null || true |
| 274 | + wait "$MEM_MON_PID" 2>/dev/null || true |
| 275 | + MEM_MON_PID="" |
| 276 | + fi |
| 277 | + |
| 278 | + # ---------- compute results ---------- |
| 279 | + |
| 280 | + # Wall time & throughput |
| 281 | + local upload_time_s throughput_mbps |
| 282 | + upload_time_s=$(awk "BEGIN {printf \"%.2f\", ${wall_end} - ${wall_start}}") |
| 283 | + throughput_mbps=$(awk "BEGIN { |
| 284 | + t = ${wall_end} - ${wall_start} |
| 285 | + if (t > 0) printf \"%.1f\", (${file_size_bytes} / 1048576) / t |
| 286 | + else print 0 |
| 287 | + }") |
| 288 | + |
| 289 | + # Memory |
| 290 | + local mem_stats peak_memory_mb avg_memory_mb |
| 291 | + mem_stats=$(compute_memory_stats "$mem_csv") |
| 292 | + peak_memory_mb=$(echo "$mem_stats" | awk '{print $1}') |
| 293 | + avg_memory_mb=$(echo "$mem_stats" | awk '{print $2}') |
| 294 | + |
| 295 | + # CPU |
| 296 | + local utime_delta stime_delta cpu_user_s cpu_system_s |
| 297 | + utime_delta=$(( utime_after - utime_before )) |
| 298 | + stime_delta=$(( stime_after - stime_before )) |
| 299 | + cpu_user_s=$(ticks_to_seconds "$utime_delta") |
| 300 | + cpu_system_s=$(ticks_to_seconds "$stime_delta") |
| 301 | + |
| 302 | + # Phase breakdown from Prometheus |
| 303 | + local phase_read phase_hash phase_write phase_tree phase_proofs |
| 304 | + phase_read=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_read_duration_seconds") |
| 305 | + phase_hash=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_hash_duration_seconds") |
| 306 | + phase_write=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_write_duration_seconds") |
| 307 | + phase_tree=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_tree_duration_seconds") |
| 308 | + phase_proofs=$(compute_delta_metric "$metrics_before" "$metrics_after" "archivist_upload_proofs_duration_seconds") |
| 309 | + |
| 310 | + # Config JSON |
| 311 | + local config_json |
| 312 | + config_json=$(cat <<EOF |
| 313 | +{ |
| 314 | + "block_size": 65536, |
| 315 | + "repo_kind": "${REPO_KIND}", |
| 316 | + "num_threads": ${NUM_THREADS}, |
| 317 | + "direct_io": ${DIRECT_IO}, |
| 318 | + "no_fsync": ${NO_FSYNC} |
| 319 | + } |
| 320 | +EOF |
| 321 | + ) |
| 322 | + |
| 323 | + # System JSON |
| 324 | + local system_json |
| 325 | + system_json=$(collect_system_info) |
| 326 | + |
| 327 | + # Print summary |
| 328 | + log_info "--- Results: ${label} run ${run_num} ---" |
| 329 | + log_info " Upload time: ${upload_time_s}s" |
| 330 | + log_info " Throughput: ${throughput_mbps} MB/s" |
| 331 | + log_info " Peak memory: ${peak_memory_mb} MB" |
| 332 | + log_info " Avg memory: ${avg_memory_mb} MB" |
| 333 | + log_info " CPU user: ${cpu_user_s}s" |
| 334 | + log_info " CPU system: ${cpu_system_s}s" |
| 335 | + log_info " CID: ${cid}" |
| 336 | + |
| 337 | + # Write result JSON |
| 338 | + write_result_json "$result_json" \ |
| 339 | + "archivist" "$label" "$file_size_bytes" \ |
| 340 | + "$upload_time_s" "$throughput_mbps" \ |
| 341 | + "$peak_memory_mb" "$avg_memory_mb" \ |
| 342 | + "$cpu_user_s" "$cpu_system_s" \ |
| 343 | + "$phase_read" "$phase_hash" "$phase_write" "$phase_tree" "$phase_proofs" \ |
| 344 | + "$cid" "$config_json" "$system_json" |
| 345 | + |
| 346 | + # Write CSV row |
| 347 | + local summary_csv="${OUTPUT_DIR}/summary.csv" |
| 348 | + write_csv_header "$summary_csv" |
| 349 | + write_csv_row "$summary_csv" \ |
| 350 | + "archivist" "$label" "$file_size_bytes" \ |
| 351 | + "$upload_time_s" "$throughput_mbps" \ |
| 352 | + "$peak_memory_mb" "$avg_memory_mb" \ |
| 353 | + "$cpu_user_s" "$cpu_system_s" \ |
| 354 | + "$phase_read" "$phase_hash" "$phase_write" "$phase_tree" "$phase_proofs" \ |
| 355 | + "$cid" |
| 356 | + |
| 357 | + # Stop node & clean data dir |
| 358 | + stop_node |
| 359 | +} |
| 360 | + |
| 361 | +# ---------- main ---------- |
| 362 | +main() { |
| 363 | + log_info "Archivist Upload Benchmark" |
| 364 | + log_info "Sizes: ${SIZES[*]}" |
| 365 | + log_info "Runs per size: ${RUNS}" |
| 366 | + log_info "Output: ${OUTPUT_DIR}" |
| 367 | + |
| 368 | + # Build |
| 369 | + build_binary |
| 370 | + |
| 371 | + if [[ ! -x "$BINARY" ]]; then |
| 372 | + log_error "Binary not found or not executable: ${BINARY}" |
| 373 | + exit 1 |
| 374 | + fi |
| 375 | + |
| 376 | + # Run benchmarks |
| 377 | + for label in "${SIZES[@]}"; do |
| 378 | + for (( run = 1; run <= RUNS; run++ )); do |
| 379 | + run_benchmark "$label" "$run" |
| 380 | + done |
| 381 | + done |
| 382 | + |
| 383 | + log_info "==========================================" |
| 384 | + log_info "All benchmarks complete!" |
| 385 | + log_info "Results in: ${OUTPUT_DIR}" |
| 386 | + log_info "==========================================" |
| 387 | +} |
| 388 | + |
| 389 | +main |
0 commit comments