Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
10174e9
feat(health): extend health endpoint with system info, error counts, …
BillChirico Feb 25, 2026
739e385
feat: add restartTracker utility
BillChirico Feb 25, 2026
82d9a09
feat: wire restart tracking into startup and graceful shutdown
BillChirico Feb 25, 2026
044771c
feat: install ws package for WebSocket log streaming
BillChirico Feb 25, 2026
b173cdb
feat: add WebSocketTransport custom Winston transport
BillChirico Feb 25, 2026
dcbc76c
test: add restartTracker tests and fix index test mock for package.js…
BillChirico Feb 25, 2026
96a063d
feat: add WebSocket log stream server with auth, filtering, and heart…
BillChirico Feb 25, 2026
1db9b26
docs: update TASK.md with results
BillChirico Feb 25, 2026
9347543
feat: wire WebSocket transport into server.js and logger.js
BillChirico Feb 25, 2026
4fe40c4
feat: add WebSocket transport to startup sequence
BillChirico Feb 25, 2026
bae1dbd
test: add WebSocket transport and log stream tests
BillChirico Feb 25, 2026
36cef93
chore: fix import order in logStream.js (biome lint)
BillChirico Feb 25, 2026
ab4f1fd
docs: update TASK.md with results
BillChirico Feb 25, 2026
446e046
merge: integrate restart tracker into feat/logs-and-health
BillChirico Feb 25, 2026
9b6fb0d
Merge branch 'feat/logs-and-health-health' into feat/logs-and-health
BillChirico Feb 25, 2026
45f908d
feat: add bot health API proxy route
BillChirico Feb 25, 2026
3c213c5
feat: add health cards component and shared types
BillChirico Feb 25, 2026
88f10c2
feat: add /api/log-stream/ws-ticket route for authenticated WS connec…
BillChirico Feb 25, 2026
c71f821
feat: add restart history table component
BillChirico Feb 25, 2026
ea06e14
feat: add useLogStream WebSocket hook with auto-reconnect
BillChirico Feb 25, 2026
8157ffe
feat: add health section orchestrator and logs page; fix gitignore fo…
BillChirico Feb 25, 2026
8ae516d
feat: add LogViewer terminal-style component with auto-scroll and met…
BillChirico Feb 25, 2026
0efdcf9
feat: add LogFilters component with level/module/search controls
BillChirico Feb 25, 2026
1095bf1
feat: add /dashboard/logs page route
BillChirico Feb 25, 2026
ede33e4
feat: add Logs link to dashboard sidebar navigation
BillChirico Feb 25, 2026
150af35
docs: update TASK.md with results
BillChirico Feb 25, 2026
0553d32
docs: update TASK.md with completed log UI results
BillChirico Feb 25, 2026
d00a5c3
merge: integrate health UI and log viewer into logs page
BillChirico Feb 25, 2026
e678912
fix(security): replace raw BOT_API_SECRET with short-lived HMAC ticket
BillChirico Feb 25, 2026
61589f7
fix: show CPU utilization % instead of raw cumulative seconds
BillChirico Feb 25, 2026
642ed56
fix(a11y): make log metadata toggle keyboard-accessible
BillChirico Feb 25, 2026
f5252d9
fix: flatten server metadata field into meta during normalization
BillChirico Feb 25, 2026
9c25e5f
fix: retry ticket-fetch failures with exponential backoff
BillChirico Feb 25, 2026
f88b176
chore: clean up .gitignore — remove redundant /logs/security/ and uni…
BillChirico Feb 25, 2026
51a1370
fix: await async handleAuth in WS message handler, add shutdown metadata
BillChirico Feb 25, 2026
e8f5f13
fix: improve bot-health error logging — show which env vars are missing
BillChirico Feb 25, 2026
f33207b
fix: lazy-load queryLogs in health route, wrap in try/catch
BillChirico Feb 25, 2026
167a06b
fix: pass sendFilter directly and disable filters until connected
BillChirico Feb 25, 2026
2075903
fix: coerce message to string before search filtering in WS transport
BillChirico Feb 25, 2026
8188397
fix: classify 300ms ping as yellow, not red
BillChirico Feb 25, 2026
e8af514
fix: self-heal getRestarts on missing table, add structured warn meta…
BillChirico Feb 25, 2026
38c7a2d
fix: skip redundant setLoading(false) on background health refresh
BillChirico Feb 25, 2026
eff5fc0
fix: correctly remove timed-out queue waiters in test helper
BillChirico Feb 25, 2026
4406415
fix: log shutdown uptime recording failures instead of swallowing sil…
BillChirico Feb 25, 2026
a14feca
fix: restart-history — remove dead code and fix 'start' matching 'res…
BillChirico Feb 25, 2026
3523768
fix: validate individual RestartRecord items in isBotHealth
BillChirico Feb 25, 2026
6e3e3c9
test: update restartTracker test for structured warn metadata
BillChirico Feb 25, 2026
1267ab2
docs: update TASK.md with results
BillChirico Feb 25, 2026
c80a471
docs: update TASK.md with results
BillChirico Feb 25, 2026
6d4e697
merge: integrate frontend fixes into feat/logs-and-health
BillChirico Feb 25, 2026
7d2df72
fix(types): nullable errors counts and RestartRecord fields + isBotHe…
BillChirico Feb 25, 2026
97c0381
fix: replace raw secret auth with HMAC ticket validation in WS logStream
BillChirico Feb 25, 2026
8b372e4
fix(health-cards): null-safe rendering for error counts
BillChirico Feb 25, 2026
5c12ac7
fix(restart-history): null-safe rendering for version and uptimeBefore
BillChirico Feb 25, 2026
9042fe5
fix(health-section): avoid error flicker during background refresh
BillChirico Feb 25, 2026
24bb60d
test: add self-heal tests for getRestarts 42P01 path
BillChirico Feb 25, 2026
9068859
fix(log-viewer): widen level badge to min-w-[3rem] to prevent label c…
BillChirico Feb 25, 2026
39c61a9
fix(log-ws): reset connectingRef in cleanup to unblock reconnect on r…
BillChirico Feb 25, 2026
ae9e095
fix: guard readFileSync for package.json and clean up orphaned WS tra…
BillChirico Feb 25, 2026
0de3058
fix: trivial improvements — tombstone, wsTransport reset, ws.OPEN con…
BillChirico Feb 25, 2026
bb3f807
chore: remove transient TASK.md work artifact
BillChirico Feb 25, 2026
a1d8b6d
Merge branch 'feat/logs-health-r2-frontend' into feat/logs-and-health
BillChirico Feb 25, 2026
56e707c
merge: resolve conflict with main (keep getPool + WS transport imports)
BillChirico Feb 25, 2026
85943f6
fix(health.js): use getRestarts/getPool, return RestartRecord[] array
BillChirico Feb 25, 2026
16c7868
fix(types): validate optional memory.rss in isBotHealth guard
BillChirico Feb 25, 2026
c7513d1
fix(logStream.js): type-guard ticket as string, use Number.isFinite f…
BillChirico Feb 25, 2026
8d35e6d
fix(websocket.js): exclude splat from metadata, normalize message to …
BillChirico Feb 25, 2026
4a1e99a
fix(health-section): use runtime locale, clear error on success, fix …
BillChirico Feb 25, 2026
d3c2474
fix(lib): extract shared formatUptime utility to format-time.ts
BillChirico Feb 25, 2026
80c056c
fix(health-cards): clamp CPU% to 0-100, use shared formatUptime
BillChirico Feb 25, 2026
929ae8d
fix(health.test.js): update restarts assertion to expect RestartRecor…
BillChirico Feb 25, 2026
f130ad7
fix(restart-history): sort DESC by timestamp, use runtime locale, use…
BillChirico Feb 25, 2026
de70d0b
fix(log-ws): add cache no-store to ticket fetch, fix connect race wit…
BillChirico Feb 25, 2026
00c82b1
Merge branch 'fix/pr87-frontend' into feat/logs-and-health
BillChirico Feb 25, 2026
808a723
fix(server): wrap setupLogStream in try/catch to prevent hanging Promise
BillChirico Feb 25, 2026
2719b92
fix(logStream): guard double-call + fix history race condition by reg…
BillChirico Feb 25, 2026
544bc48
fix(websocket): move EXCLUDED_KEYS to module scope to avoid per-call …
BillChirico Feb 25, 2026
13526e7
test(health): add queryLogs failure-path test
BillChirico Feb 25, 2026
c730a09
fix(health-cards): use runtime locale instead of hardcoded en-US
BillChirico Feb 25, 2026
3d9316b
fix(health-section): add router to useCallback dependency array
BillChirico Feb 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
node_modules/
.env
*.log
logs/
/logs/
coverage/
.next/

Expand All @@ -13,7 +13,7 @@ coverage/
.claude_settings.json
.worktrees/
.security-key
logs/security/
/logs/security/

# State persistence data (keep structure, ignore content)
data/*
Expand All @@ -38,3 +38,7 @@ web/.env.local
web/.env.*.local
web/tsconfig.tsbuildinfo

# Un-ignore Next.js route that happens to be named "logs"
!web/src/app/dashboard/logs/
!web/src/app/dashboard/logs/**

101 changes: 101 additions & 0 deletions TASK.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Task: Health Cards + Restart History UI

## Parent
- **Master Task:** task-001
- **Branch:** feat/logs-and-health
- **Issue:** [#35](https://github.com/VolvoxLLC/volvox-bot/issues/35)

## Context

Build health metric cards and restart history table for the dashboard. Data comes from the extended health endpoint (`GET /api/v1/health`).

### Existing Code
- `web/src/lib/bot-api-proxy.ts` — API proxy for authenticated requests
- `web/src/components/dashboard/config-editor.tsx` — Reference for dashboard patterns
- `web/src/components/ui/` — shadcn/ui components (card, table, badge, etc.)
- Health endpoint returns: uptime, memory, discord, system, errors, restarts

## IMPORTANT — READ FIRST

1. **Commit after every file you create or major change**
2. **Start writing code IMMEDIATELY**
3. **Expected duration: ~15m**

**Commit flow:**
1. Create health cards component → commit
2. Create restart history component → commit
3. Create page or section → commit
4. Lint/build → commit

## Files to Create

- `web/src/components/dashboard/health-cards.tsx` — Health metric cards
- `web/src/components/dashboard/restart-history.tsx` — Restart log table
- `web/src/app/dashboard/logs/page.tsx` — Add health section (if page exists from log viewer slice, just add to it — otherwise create)

## Requirements

- [ ] **Health cards** (grid layout):
| Card | Data | Display |
|------|------|---------|
| Uptime | `health.uptime` | Human-readable ("3d 14h 22m") |
| Memory | `health.memory.heapUsed/heapTotal` | MB + percentage bar |
| Discord Ping | `health.discord.ping` | ms, color: green <100, yellow <300, red >300 |
| Guilds | `health.discord.guilds` | Count |
| Errors (1h) | `health.errors.lastHour` | Count, red if >0 |
| Errors (24h) | `health.errors.lastDay` | Count |
| CPU | `health.system.cpuUsage` | user + system % |
| Node | `health.system.nodeVersion` | Version string |
- [ ] **Restart history table**:
- Columns: timestamp, reason, version, uptime before restart
- Last 20 restarts from `health.restarts`
- Human-readable timestamps
- Color-coded reasons (startup=green, crash=red)
- [ ] Auto-refresh health data every 60s
- [ ] Loading skeleton while fetching
- [ ] Lint passes, build succeeds

## Constraints
- Do NOT touch backend files
- Do NOT touch log viewer (different slice)
- Use shadcn/ui Card, Table, Badge components
- Use Tailwind for styling

## Acceptance Criteria
- [ ] Health cards display all 8 metrics
- [ ] Color coding works for ping and errors
- [ ] Restart history table shows recent restarts
- [ ] Auto-refresh every 60s
- [ ] Loading state while fetching
- [ ] Lint + build pass

## Results

**Status:** ✅ Done

**Commits:**
- `45f908d` feat: add bot health API proxy route
- `3c213c5` feat: add health cards component and shared types
- `c71f821` feat: add restart history table component
- `8157ffe` feat: add health section orchestrator and logs page; fix gitignore for logs route

**Changes:**
- `web/src/app/api/bot-health/route.ts` — authenticated proxy to bot's `GET /api/v1/health`
- `web/src/components/dashboard/types.ts` — `BotHealth` / `RestartRecord` types + runtime validator
- `web/src/components/dashboard/health-cards.tsx` — 8-card grid (uptime, memory + bar, discord ping w/ color, guilds, errors 1h/24h w/ red, CPU, Node version)
- `web/src/components/dashboard/restart-history.tsx` — table with last 20 restarts, color-coded reason badges, human-readable timestamps
- `web/src/components/dashboard/health-section.tsx` — client component with auto-refresh (60s), loading skeleton, error banner, refresh button
- `web/src/app/dashboard/logs/page.tsx` — new `/dashboard/logs` route rendering `<HealthSection />`
- `.gitignore` — added exception for `web/src/app/dashboard/logs/` (conflicts with `logs/` gitignore rule)

**Build:** ✅ `next build` — compiled successfully, `/dashboard/logs` route created
**TypeCheck:** ✅ `tsc --noEmit` — no errors in new files
**Root lint:** Pre-existing failures in bot JS files only (26 errors existed before this task)

**Acceptance Criteria:**
- [x] Health cards display all 8 metrics
- [x] Color coding works for ping (green/yellow/red) and errors (red if >0)
- [x] Restart history table shows recent restarts (last 20, reversed)
- [x] Auto-refresh every 60s
- [x] Loading state while fetching (skeleton cards + table skeleton)
- [x] Lint + build pass (build ✅, root lint pre-existing failures not introduced by this task)
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"pg": "^8.18.0",
"winston": "^3.19.0",
"winston-daily-rotate-file": "^5.0.0",
"winston-transport": "^4.9.0"
"winston-transport": "^4.9.0",
"ws": "^8.19.0"
},
"pnpm": {
"overrides": {
Expand Down
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 45 additions & 2 deletions src/api/routes/health.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,26 @@
*/

import { Router } from 'express';
import { queryLogs } from '../../utils/logQuery.js';
import { isValidSecret } from '../middleware/auth.js';

const router = Router();

// Graceful fallback for restartTracker — may not exist yet
let getRestartData = null;
try {
const mod = await import('../../utils/restartTracker.js');
getRestartData = mod.getRestartData ?? mod.default?.getRestartData ?? null;
} catch {
// restartTracker not available yet — fallback to null
}

/**
* GET / — Health check endpoint
* Returns status, uptime, and Discord connection details.
* Includes detailed memory usage only when a valid x-api-secret header is provided.
* Includes extended data only when a valid x-api-secret header is provided.
*/
router.get('/', (req, res) => {
router.get('/', async (req, res) => {
const { client } = req.app.locals;

// Defensive guard in case health check is hit before Discord login completes
Expand All @@ -38,6 +48,39 @@ router.get('/', (req, res) => {
guilds: client.guilds.cache.size,
};
body.memory = process.memoryUsage();

body.system = {
platform: process.platform,
nodeVersion: process.version,
cpuUsage: process.cpuUsage(),
};

// Error counts from logs table
const now = new Date();
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000);
const oneDayAgo = new Date(now.getTime() - 24 * 60 * 60 * 1000);

const [hourResult, dayResult] = await Promise.all([
queryLogs({ level: 'error', since: oneHourAgo, limit: 1 }),
queryLogs({ level: 'error', since: oneDayAgo, limit: 1 }),
]);

body.errors = {
lastHour: hourResult.total,
lastDay: dayResult.total,
};

// Restart data with graceful fallback
if (getRestartData) {
try {
const restartInfo = await getRestartData();
body.restarts = restartInfo;
} catch {
body.restarts = { total: 0, last: null };
}
} else {
body.restarts = { total: 0, last: null };
}
}

res.json(body);
Expand Down
14 changes: 13 additions & 1 deletion src/api/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import apiRouter from './index.js';
import { rateLimit } from './middleware/rateLimit.js';
import { stopAuthCleanup } from './routes/auth.js';
import { stopGuildCacheCleanup } from './utils/discordApi.js';
import { setupLogStream, stopLogStream } from './ws/logStream.js';

/** @type {import('node:http').Server | null} */
let server = null;
Expand Down Expand Up @@ -84,9 +85,11 @@ export function createApp(client, dbPool) {
*
* @param {import('discord.js').Client} client - Discord client instance
* @param {import('pg').Pool | null} dbPool - PostgreSQL connection pool
* @param {Object} [options] - Additional options
* @param {import('../transports/websocket.js').WebSocketTransport} [options.wsTransport] - WebSocket transport for log streaming
* @returns {Promise<import('node:http').Server>} The HTTP server instance
*/
export async function startServer(client, dbPool) {
export async function startServer(client, dbPool, options = {}) {
if (server) {
warn('startServer called while a server is already running — closing orphaned server');
await stopServer();
Expand All @@ -108,6 +111,12 @@ export async function startServer(client, dbPool) {
return new Promise((resolve, reject) => {
server = app.listen(port, () => {
info('API server started', { port });

// Attach WebSocket log stream if transport provided
if (options.wsTransport) {
setupLogStream(server, options.wsTransport);
}

resolve(server);
});
server.once('error', (err) => {
Expand All @@ -124,6 +133,9 @@ export async function startServer(client, dbPool) {
* @returns {Promise<void>}
*/
export async function stopServer() {
// Stop WebSocket log stream before closing HTTP server
await stopLogStream();

stopAuthCleanup();
stopGuildCacheCleanup();

Expand Down
Loading