diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cd8ef43dd..164a758df 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,11 +13,13 @@ permissions: jobs: ci: - name: format + typecheck + test + name: format + sast + typecheck + test runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Setup Bun uses: ./.github/actions/setup-bun @@ -30,6 +32,39 @@ jobs: - name: Format check run: bun prettier --check "packages/opencode/src/**/*.ts" "packages/plugin/src/**/*.ts" + - name: SAST check + run: | + ERRORS=0 + SRC_FILES=$(find packages/opencode/src -name '*.ts' -o -name '*.tsx' | grep -v node_modules | grep -v '\.test\.' | grep -v '\.snap') + if grep -rn '\beval(' $SRC_FILES 2>/dev/null | grep -v '// sast-ignore'; then + echo "SAST ERROR: eval() detected"; ERRORS=$((ERRORS + 1)) + fi + if grep -rn 'new Function(' $SRC_FILES 2>/dev/null | grep -v '// sast-ignore'; then + echo "SAST ERROR: new Function() detected"; ERRORS=$((ERRORS + 1)) + fi + if grep -rnE "(password|secret|token|api[_-]?key)\s*[:=]\s*['\"][A-Za-z0-9_/+=]{8,}['\"]" $SRC_FILES 2>/dev/null | grep -v '// sast-ignore' | grep -v test | grep -v example | grep -v placeholder; then + echo "SAST ERROR: Possible hardcoded secret"; ERRORS=$((ERRORS + 1)) + fi + if [ "$ERRORS" -gt 0 ]; then echo "SAST: $ERRORS error(s)"; exit 1; fi + echo "SAST: passed" + + - name: AI attribution check + run: | + PATTERN="co-authored-by:.*(cursor|claude|anthropic|aider|copilot)" + FOUND=0 + for commit in $(git log origin/${{ github.base_ref }}..HEAD --format="%H" 2>/dev/null || echo ""); do + if git log -1 --format="%B" "$commit" | grep -qiE "$PATTERN"; then + echo "ERROR: AI attribution in $(git log -1 --format='%h %s' $commit)" + FOUND=$((FOUND + 1)) + fi + if git log -1 --format="%B" "$commit" | grep -q "Generated with Claude Code"; then + echo "ERROR: Claude Code marker in $(git log -1 --format='%h %s' $commit)" + FOUND=$((FOUND + 1)) + fi + done + if [ "$FOUND" -gt 0 ]; then exit 1; fi + echo "AI attribution check: passed" + - name: Typecheck run: bun turbo typecheck diff --git a/.husky/commit-msg b/.husky/commit-msg index a7adb1935..b3612deb6 100755 --- a/.husky/commit-msg +++ b/.husky/commit-msg @@ -4,7 +4,6 @@ set -e msg=$(head -1 "$1") # Conventional commit format: type(scope): description -# or: type: description if ! echo "$msg" | grep -qE '^(feat|fix|docs|chore|refactor|test|perf|ci|build|revert)(\([a-zA-Z0-9_-]+\))?!?: .+'; then echo "ERROR: Commit message does not follow conventional commit format." echo "" @@ -16,13 +15,26 @@ if ! echo "$msg" | grep -qE '^(feat|fix|docs|chore|refactor|test|perf|ci|build|r exit 1 fi -# Strip AI co-authorship trailers from commit message -# These are injected by AI coding tools and misrepresent authorship. -# See docs/AI_ATTRIBUTION_POLICY.md for the full list. +# Strip AI co-authorship trailers from commit message. +# Only 4 tools actually inject (verified from docs/source): +# Cursor: Co-authored-by: Cursor +# Claude: Co-Authored-By: Claude +# Aider: Co-authored-by: aider (MODEL) +# Copilot: (commits as author, but strip if trailer found) +# Also strips Claude Code body marker. +# See docs/AI_ATTRIBUTION_POLICY.md if [ "$(uname)" = "Darwin" ]; then - sed -i '' '/^Co-authored-by:.*[Cc]laude\|[Cc]opilot\|[Cc]ursor\|[Cc]hat[Gg][Pp][Tt]\|[Oo]pen[Aa][Ii]\|[Aa]nthrop\|[Dd]evin\|[Aa]ider\|[Cc]odeium\|[Tt]abnine\|[Ww]indsurf\|[Cc]ody\|[Cc]ontinue\|[Aa]ugment\|[Ss]upermaven/d' "$1" - sed -i '' '/^Authored-by:.*AI\|^Written-by:.*AI\|^Generated-by:/d' "$1" + sed -i '' '/^[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]ursor\|cursoragent@cursor/d' "$1" + sed -i '' '/^[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]laude\|noreply@anthropic/d' "$1" + sed -i '' '/^[Cc]o-[Aa]uthored-[Bb]y:.*aider\|noreply@aider/d' "$1" + sed -i '' '/^[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]opilot/d' "$1" + sed -i '' '/Generated with Claude Code/d' "$1" + sed -i '' '/Generated with \[Claude Code\]/d' "$1" else - sed -i '/^Co-authored-by:.*[Cc]laude\|[Cc]opilot\|[Cc]ursor\|[Cc]hat[Gg][Pp][Tt]\|[Oo]pen[Aa][Ii]\|[Aa]nthrop\|[Dd]evin\|[Aa]ider\|[Cc]odeium\|[Tt]abnine\|[Ww]indsurf\|[Cc]ody\|[Cc]ontinue\|[Aa]ugment\|[Ss]upermaven/d' "$1" - sed -i '/^Authored-by:.*AI\|^Written-by:.*AI\|^Generated-by:/d' "$1" + sed -i '/^[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]ursor\|cursoragent@cursor/d' "$1" + sed -i '/^[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]laude\|noreply@anthropic/d' "$1" + sed -i '/^[Cc]o-[Aa]uthored-[Bb]y:.*aider\|noreply@aider/d' "$1" + sed -i '/^[Cc]o-[Aa]uthored-[Bb]y:.*[Cc]opilot/d' "$1" + sed -i '/Generated with Claude Code/d' "$1" + sed -i '/Generated with \[Claude Code\]/d' "$1" fi diff --git a/.husky/pre-commit b/.husky/pre-commit index 91499d2bc..ed4eee363 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -5,9 +5,6 @@ set -e bun prettier --write "packages/opencode/src/**/*.ts" "packages/plugin/src/**/*.ts" git update-index --again -# Strip AI tool attribution from staged files -scripts/strip-ai-attribution.sh - # SAST: check for security anti-patterns in staged files scripts/sast-check.sh diff --git a/.husky/pre-push b/.husky/pre-push index 746ecf791..97aeb3ba6 100755 --- a/.husky/pre-push +++ b/.husky/pre-push @@ -2,7 +2,6 @@ set -e # Check if bun version matches package.json -# keep in sync with packages/script/src/index.ts semver qualifier bun -e ' import { semver } from "bun"; const pkg = await Bun.file("package.json").json(); @@ -19,30 +18,44 @@ if (process.versions.bun !== expectedBunVersion) { } ' -# Verify no AI attribution in commits being pushed -# Checks all commits between upstream tracking branch and HEAD +# Verify no AI attribution in commits being pushed. +# Real patterns from tools that actually inject: +# Cursor: Co-authored-by: Cursor +# Claude: Co-Authored-By: Claude +# Aider: Co-authored-by: aider (...) commits=$(git log @{u}..HEAD --format="%H" 2>/dev/null || echo "") for commit in $commits; do msg=$(git log -1 --format="%B" "$commit") - if echo "$msg" | grep -qiE "co-authored-by:.*(claude|copilot|cursor|chatgpt|openai|anthropic|devin|aider|codeium|tabnine|windsurf|cody|continue|augment|supermaven)"; then + if echo "$msg" | grep -qiE "co-authored-by:.*(cursor|claude|anthropic|aider|copilot)"; then short=$(git log -1 --format="%h %s" "$commit") echo "" echo "ERROR: AI co-authorship attribution found in commit:" echo " $short" - echo "" - echo "Strip it with: git rebase -i HEAD~N (edit the commit message)" - echo "See docs/AI_ATTRIBUTION_POLICY.md for details." + echo "Strip with: git rebase -i HEAD~N" + echo "See docs/AI_ATTRIBUTION_POLICY.md" + exit 1 + fi + if echo "$msg" | grep -q "Generated with Claude Code"; then + short=$(git log -1 --format="%h %s" "$commit") + echo "ERROR: Claude Code marker in commit: $short" exit 1 fi done -# Strip AI attribution from existing PR body (if gh is available and PR exists) +# Strip AI attribution from PR body (if gh available and PR exists) if command -v gh >/dev/null 2>&1; then branch=$(git rev-parse --abbrev-ref HEAD) pr_number=$(gh pr view "$branch" --json number --jq '.number' 2>/dev/null || echo "") if [ -n "$pr_number" ]; then body=$(gh pr view "$pr_number" --json body --jq '.body' 2>/dev/null || echo "") - cleaned=$(echo "$body" | sed -E '/[Cc]o-authored-by:.*(Claude|Copilot|Cursor|ChatGPT|OpenAI|Anthropic|Devin|Aider|Codeium|Tabnine|Windsurf|Cody|Continue|Augment|Supermaven)/Id; /Created with Cursor/Id; /Generated by (Claude|Copilot|ChatGPT|Codeium|Windsurf|Devin|Aider)/Id; /AI-generated/Id') + cleaned=$(echo "$body" | sed -E \ + -e '/[Cc]o-[Aa]uthored-[Bb]y:.*(Cursor|Claude|aider|Copilot|anthropic)/Id' \ + -e '/cursoragent@cursor\.com/d' \ + -e '/noreply@anthropic\.com/d' \ + -e '/noreply@aider\.chat/d' \ + -e '/Generated with Claude Code/d' \ + -e '/Generated with \[Claude Code\]/d' \ + ) if [ "$body" != "$cleaned" ]; then echo "$cleaned" | gh pr edit "$pr_number" --body-file - 2>/dev/null echo "Stripped AI attribution from PR #$pr_number body" diff --git a/BUGS.md b/BUGS.md index b1718e26f..d6b393989 100644 --- a/BUGS.md +++ b/BUGS.md @@ -19,6 +19,38 @@ All bugs tracked here. Do not create per-package bug files. | S4 | Server unauthenticated on non-loopback | Med | Server throws if bound to non-loopback without `OPENCODE_SERVER_PASSWORD` | | S5 | Read tool exposes .env files | Med | Sensitive file deny-list; `always: []` for sensitive files forces permission prompt | +## Open — Bugs (0) + +_No open bugs._ + +## Fixed — Bugs (QA deep dive, PR #32) + +| # | Issue | Sev | Fix | +| --- | ----- | --- | --- | +| B53 | `CAS.deleteBySession()` race condition | High | Wrapped SELECT + DELETE in `Database.transaction()` | +| B54 | `CAS.deleteOrphans()` deletes shared CAS entries | High | Added EditGraphNode reference check before deleting | +| B55 | `EditGraph.checkout()` inconsistent on partial failure | High | Wrapped undo loop + head update in `Database.transaction()` | +| B56 | `EditGraph.deleteBySession()` not atomic | Med | Wrapped in `Database.transaction()` | +| B57 | `filterEdited()` synthetic placeholder reuses part ID | Med | Changed to `PartID.ascending()` for unique synthetic ID | + +## Open — Edge Cases (1) + +| # | Issue | Sev | Location | Notes | +| --- | ----- | --- | -------- | ----- | +| E1 | `sweep()` clock skew: `turnWhenSet > currentTurn` | Low | `context-edit/index.ts:622-625` | Negative elapsed → never sweeps. Only possible from a bug upstream — turn counter is monotonic. | + +## False Positives — Edge Cases (5) + +Investigated and determined to be correct behavior or non-issues. + +| Issue | Verdict | +|-------|---------| +| E2: `EditGraph.getHead()` returns undefined vs null | **Correct** — `undefined` is standard TS for "not present"; all callers use `!head` which handles both | +| E3: First commit creates self-referential branch | **Intentional** — `branches: { main: nodeID }` is standard DAG initialization; "main" → first node is correct | +| E4: `Objective.extract()` concurrent race | **False positive** — prompt loop serializes calls per session; concurrency cannot occur | +| E5: `SideThread.create()` duplicate ID not caught | **Correct** — `Identifier.ascending()` is unique (timestamp+counter+random); DB error on collision is the right behavior (fail loudly) | +| E6: SHA-256 collision in CAS not detected | **Intentional** — SHA-256 has no known collisions; `onConflictDoNothing()` was explicitly chosen (B43 fix) | + ## Open — Code Quality (5) Found during QA bug hunt (static analysis). Not crashes, but code quality issues. diff --git a/WHAT_WE_DID.md b/WHAT_WE_DID.md index 237737448..339674a5b 100644 --- a/WHAT_WE_DID.md +++ b/WHAT_WE_DID.md @@ -20,4 +20,4 @@ CAS, edit graph, context editing (6 ops), side threads, objective tracker, class - **#29:** Phase 5 tests: filterEdited (8), filterEphemeral (6), ContextEdit validation (10) — 24 new tests - **#30:** Phase 6 Effect analysis: all 12 upstream Effect PRs reviewed — zero need reimplementation - **#31:** QA bug hunt: 18 console.log→Log.create() fixes, 5 code quality issues documented (Q1-Q5) -- **#32:** Hardened git hooks: SAST checks, AI attribution stripping (pre-commit/commit-msg/pre-push), PR body cleaning +- **#32:** Hardened git hooks + QA: SAST, AI attribution stripping, CI fixes, tmux tests passed, 5 bugs fixed (B53-B57 — transaction safety, shared CAS, synthetic ID), 5 false positives documented diff --git a/packages/opencode/src/cas/graph.ts b/packages/opencode/src/cas/graph.ts index 8c8e9dd76..a34c47429 100644 --- a/packages/opencode/src/cas/graph.ts +++ b/packages/opencode/src/cas/graph.ts @@ -225,35 +225,37 @@ export namespace EditGraph { nodesToUndo.push(node) } - // Undo nodes: restore parts from CAS - for (const node of nodesToUndo) { - if (!node.cas_hash) continue - const casEntry = CAS.get(node.cas_hash) - if (!casEntry) { - log.warn("CAS entry not found during checkout", { hash: node.cas_hash, nodeID: node.id }) - continue - } - - try { - const originalPart = JSON.parse(casEntry.content) as MessageV2.Part - // Restore the original part (remove edit metadata) - Session.updatePart({ - ...originalPart, - edit: undefined, - }) - } catch (e) { - log.warn("Failed to restore part during checkout", { nodeID: node.id, error: String(e) }) + // Undo nodes and update head atomically + Database.transaction(() => { + for (const node of nodesToUndo) { + if (!node.cas_hash) continue + const casEntry = CAS.get(node.cas_hash) + if (!casEntry) { + log.warn("CAS entry not found during checkout", { hash: node.cas_hash, nodeID: node.id }) + continue + } + + try { + const originalPart = JSON.parse(casEntry.content) as MessageV2.Part + // Restore the original part (remove edit metadata) + Session.updatePart({ + ...originalPart, + edit: undefined, + }) + } catch (e) { + log.warn("Failed to restore part during checkout", { nodeID: node.id, error: String(e) }) + } } - } - // Update head to target - Database.use((db) => { - db.update(EditGraphHeadTable) - .set({ node_id: targetNodeID }) - .where(eq(EditGraphHeadTable.session_id, sessionID)) - .run() + // Update head to target + Database.use((db) => { + db.update(EditGraphHeadTable) + .set({ node_id: targetNodeID }) + .where(eq(EditGraphHeadTable.session_id, sessionID)) + .run() - Database.effect(() => Bus.publish(Event.CheckedOut, { sessionID, nodeID: targetNodeID }, InstanceALS.directory)) + Database.effect(() => Bus.publish(Event.CheckedOut, { sessionID, nodeID: targetNodeID }, InstanceALS.directory)) + }) }) log.info("checked out", { sessionID, targetNodeID, undone: nodesToUndo.length }) @@ -317,21 +319,21 @@ export namespace EditGraph { } export function deleteBySession(sessionID: string): number { - const nodes = Database.use((db) => - db + let count = 0 + Database.transaction((db) => { + const nodes = db .select({ id: EditGraphNodeTable.id }) .from(EditGraphNodeTable) .where(eq(EditGraphNodeTable.session_id, sessionID)) - .all(), - ) - Database.use((db) => { + .all() db.delete(EditGraphHeadTable).where(eq(EditGraphHeadTable.session_id, sessionID)).run() db.delete(EditGraphNodeTable).where(eq(EditGraphNodeTable.session_id, sessionID)).run() + count = nodes.length }) - if (nodes.length > 0) { - log.info("deleted by session", { sessionID: sessionID.slice(0, 12), nodes: nodes.length }) + if (count > 0) { + log.info("deleted by session", { sessionID: sessionID.slice(0, 12), nodes: count }) } - return nodes.length + return count } function buildPathToRoot(nodeID: string, nodeMap?: Map): Node[] { diff --git a/packages/opencode/src/cas/index.ts b/packages/opencode/src/cas/index.ts index 7ed9be7f4..213cfd82f 100644 --- a/packages/opencode/src/cas/index.ts +++ b/packages/opencode/src/cas/index.ts @@ -1,6 +1,6 @@ import { createHash } from "crypto" import { Database, eq, and, lt, isNull, not, inArray, sql } from "@/storage/db" -import { CASObjectTable } from "./cas.sql" +import { CASObjectTable, EditGraphNodeTable } from "./cas.sql" import { SessionTable } from "@/session/session.sql" import { Token } from "@/util/token" import { Log } from "@/util/log" @@ -85,18 +85,21 @@ export namespace CAS { * Returns the number of entries deleted. */ export function deleteBySession(sessionID: string): number { - const entries = Database.use((db) => - db + let count = 0 + Database.transaction((db) => { + const entries = db .select({ hash: CASObjectTable.hash }) .from(CASObjectTable) .where(eq(CASObjectTable.session_id, sessionID)) - .all(), - ) - if (entries.length === 0) return 0 - - Database.use((db) => db.delete(CASObjectTable).where(eq(CASObjectTable.session_id, sessionID)).run()) - log.info("deleted by session", { sessionID: sessionID.slice(0, 12), count: entries.length }) - return entries.length + .all() + if (entries.length === 0) return + db.delete(CASObjectTable).where(eq(CASObjectTable.session_id, sessionID)).run() + count = entries.length + }) + if (count > 0) { + log.info("deleted by session", { sessionID: sessionID.slice(0, 12), count }) + } + return count } /** @@ -116,10 +119,30 @@ export namespace CAS { .all(), ) if (nullSessionEntries.length > 0) { - const hashes = nullSessionEntries.map((e) => e.hash) - Database.use((db) => db.delete(CASObjectTable).where(inArray(CASObjectTable.hash, hashes)).run()) - totalDeleted += nullSessionEntries.length - log.info("deleted orphans (null session)", { count: nullSessionEntries.length, olderThanDays }) + // Only delete CAS entries not referenced by any EditGraphNode + const referencedHashes = new Set( + Database.use((db) => + db + .select({ cas_hash: EditGraphNodeTable.cas_hash }) + .from(EditGraphNodeTable) + .where( + inArray( + EditGraphNodeTable.cas_hash, + nullSessionEntries.map((e) => e.hash), + ), + ) + .all(), + ) + .map((r) => r.cas_hash) + .filter(Boolean), + ) + const safeToDel = nullSessionEntries.filter((e) => !referencedHashes.has(e.hash)) + if (safeToDel.length > 0) { + const hashes = safeToDel.map((e) => e.hash) + Database.use((db) => db.delete(CASObjectTable).where(inArray(CASObjectTable.hash, hashes)).run()) + totalDeleted += safeToDel.length + } + log.info("deleted orphans (null session)", { count: safeToDel.length, olderThanDays }) } // 2. Delete entries referencing non-existent sessions (older than cutoff) @@ -142,15 +165,31 @@ export namespace CAS { // Find entries with non-existent sessions const orphans = entriesWithSession.filter((e) => !existingSessions.has(e.session_id!)) if (orphans.length > 0) { - const hashes = orphans.map((e) => e.hash) - // Delete in batches of 100 to avoid SQL parameter limits + // Only delete CAS entries not referenced by any EditGraphNode (across all sessions) + const orphanHashes = orphans.map((e) => e.hash) + const referencedOrphanHashes = new Set() const batchSize = 100 - for (let i = 0; i < hashes.length; i += batchSize) { - const batch = hashes.slice(i, i + batchSize) + for (let i = 0; i < orphanHashes.length; i += batchSize) { + const batch = orphanHashes.slice(i, i + batchSize) + const refs = Database.use((db) => + db + .select({ cas_hash: EditGraphNodeTable.cas_hash }) + .from(EditGraphNodeTable) + .where(inArray(EditGraphNodeTable.cas_hash, batch)) + .all(), + ) + for (const r of refs) { + if (r.cas_hash) referencedOrphanHashes.add(r.cas_hash) + } + } + const safeToDel = orphanHashes.filter((h) => !referencedOrphanHashes.has(h)) + // Delete in batches of 100 to avoid SQL parameter limits + for (let i = 0; i < safeToDel.length; i += batchSize) { + const batch = safeToDel.slice(i, i + batchSize) Database.use((db) => db.delete(CASObjectTable).where(inArray(CASObjectTable.hash, batch)).run()) } - totalDeleted += orphans.length - log.info("deleted orphans (missing session)", { count: orphans.length, olderThanDays }) + totalDeleted += safeToDel.length + log.info("deleted orphans (missing session)", { count: safeToDel.length, olderThanDays }) } return totalDeleted diff --git a/packages/opencode/src/cli/cmd/debug/agent.ts b/packages/opencode/src/cli/cmd/debug/agent.ts index 5b927dda6..810b82f5f 100644 --- a/packages/opencode/src/cli/cmd/debug/agent.ts +++ b/packages/opencode/src/cli/cmd/debug/agent.ts @@ -96,7 +96,7 @@ function parseToolParams(input?: string) { return JSON.parse(trimmed) } catch (jsonError) { try { - return new Function(`return (${trimmed})`)() + return new Function(`return (${trimmed})`)() // sast-ignore — debug-only command, not user-facing } catch (evalError) { throw new Error( `Failed to parse --params. Use JSON or a JS object literal. JSON error: ${jsonError}. Eval error: ${evalError}.`, diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 4b3177d00..d5cf9bf96 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -1018,7 +1018,7 @@ export namespace MessageV2 { ...msg, parts: [ { - id: firstPart.id, + id: PartID.ascending(), sessionID: firstPart.sessionID, messageID: firstPart.messageID, type: "text",