Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 178 additions & 23 deletions packages/opencode/src/snapshot/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,146 @@ export namespace Snapshot {
const diffFull = Effect.fnUntraced(function* (from: string, to: string) {
return yield* locked(
Effect.gen(function* () {
type Row = {
file: string
status: "added" | "deleted" | "modified"
binary: boolean
additions: number
deletions: number
}

type Ref = {
file: string
side: "before" | "after"
ref: string
}

const show = Effect.fnUntraced(function* (row: Row) {
if (row.binary) return ["", ""]
if (row.status === "added") {
return [
"",
yield* git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(
Effect.map((item) => item.text),
),
]
}
if (row.status === "deleted") {
return [
yield* git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(
Effect.map((item) => item.text),
),
"",
]
}
return yield* Effect.all(
[
git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
],
{ concurrency: 2 },
)
})

const load = Effect.fnUntraced(
function* (rows: Row[]) {
const refs = rows.flatMap((row) => {
if (row.binary) return []
if (row.status === "added")
return [{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref]
if (row.status === "deleted") {
return [{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref]
}
return [
{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref,
{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref,
]
})
if (!refs.length) return new Map<string, { before: string; after: string }>()

const proc = ChildProcess.make("git", [...cfg, ...args(["cat-file", "--batch"])], {
cwd: state.directory,
extendEnv: true,
stdin: Stream.make(new TextEncoder().encode(refs.map((item) => item.ref).join("\n") + "\n")),
})
const handle = yield* spawner.spawn(proc)
const [out, err] = yield* Effect.all(
[Stream.mkUint8Array(handle.stdout), Stream.mkString(Stream.decodeText(handle.stderr))],
{ concurrency: 2 },
)
const code = yield* handle.exitCode
if (code !== 0) {
log.info("git cat-file --batch failed during snapshot diff, falling back to per-file git show", {
stderr: err,
refs: refs.length,
})
return
}

const fail = (msg: string, extra?: Record<string, string>) => {
log.info(msg, { ...extra, refs: refs.length })
return undefined
}

const map = new Map<string, { before: string; after: string }>()
const dec = new TextDecoder()
let i = 0
// Parse the default `git cat-file --batch` stream: one header line,
// then exactly `size` bytes of blob content, then a trailing newline.
for (const ref of refs) {
let end = i
while (end < out.length && out[end] !== 10) end += 1
if (end >= out.length) {
return fail(
"git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show",
)
}

const head = dec.decode(out.slice(i, end))
i = end + 1
const hit = map.get(ref.file) ?? { before: "", after: "" }
if (head.endsWith(" missing")) {
map.set(ref.file, hit)
continue
}

const match = head.match(/^[0-9a-f]+ blob (\d+)$/)
if (!match) {
return fail(
"git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show",
{ head },
)
}

const size = Number(match[1])
if (!Number.isInteger(size) || size < 0 || i + size >= out.length || out[i + size] !== 10) {
return fail(
"git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show",
{ head },
)
}

const text = dec.decode(out.slice(i, i + size))
if (ref.side === "before") hit.before = text
if (ref.side === "after") hit.after = text
map.set(ref.file, hit)
i += size + 1
}

if (i !== out.length) {
return fail(
"git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show",
)
}

return map
},
Effect.scoped,
Effect.catch(() =>
Effect.succeed<Map<string, { before: string; after: string }> | undefined>(undefined),
),
)

const result: Snapshot.FileDiff[] = []
const status = new Map<string, "added" | "deleted" | "modified">()

Expand All @@ -459,30 +599,45 @@ export namespace Snapshot {
},
)

for (const line of numstat.text.trim().split("\n")) {
if (!line) continue
const [adds, dels, file] = line.split("\t")
if (!file) continue
const binary = adds === "-" && dels === "-"
const [before, after] = binary
? ["", ""]
: yield* Effect.all(
[
git([...cfg, ...args(["show", `${from}:${file}`])]).pipe(Effect.map((item) => item.text)),
git([...cfg, ...args(["show", `${to}:${file}`])]).pipe(Effect.map((item) => item.text)),
],
{ concurrency: 2 },
)
const additions = binary ? 0 : parseInt(adds)
const deletions = binary ? 0 : parseInt(dels)
result.push({
file,
before,
after,
additions: Number.isFinite(additions) ? additions : 0,
deletions: Number.isFinite(deletions) ? deletions : 0,
status: status.get(file) ?? "modified",
const rows = numstat.text
.trim()
.split("\n")
.filter(Boolean)
.flatMap((line) => {
const [adds, dels, file] = line.split("\t")
if (!file) return []
const binary = adds === "-" && dels === "-"
const additions = binary ? 0 : parseInt(adds)
const deletions = binary ? 0 : parseInt(dels)
return [
{
file,
status: status.get(file) ?? "modified",
binary,
additions: Number.isFinite(additions) ? additions : 0,
deletions: Number.isFinite(deletions) ? deletions : 0,
} satisfies Row,
]
})
const step = 100

// Keep batches bounded so a large diff does not buffer every blob at once.
for (let i = 0; i < rows.length; i += step) {
const run = rows.slice(i, i + step)
const text = yield* load(run)

for (const row of run) {
const hit = text?.get(row.file) ?? { before: "", after: "" }
const [before, after] = row.binary ? ["", ""] : text ? [hit.before, hit.after] : yield* show(row)
result.push({
file: row.file,
before,
after,
additions: row.additions,
deletions: row.deletions,
status: row.status,
})
}
}

return result
Expand Down
92 changes: 92 additions & 0 deletions packages/opencode/test/snapshot/snapshot.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,98 @@ test("diffFull with new file additions", async () => {
})
})

test("diffFull with a large interleaved mixed diff", async () => {
await using tmp = await bootstrap()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const ids = Array.from({ length: 60 }, (_, i) => i.toString().padStart(3, "0"))
const mod = ids.map((id) => fwd(tmp.path, "mix", `${id}-mod.txt`))
const del = ids.map((id) => fwd(tmp.path, "mix", `${id}-del.txt`))
const add = ids.map((id) => fwd(tmp.path, "mix", `${id}-add.txt`))
const bin = ids.map((id) => fwd(tmp.path, "mix", `${id}-bin.bin`))

await $`mkdir -p ${tmp.path}/mix`.quiet()
await Promise.all([
...mod.map((file, i) => Filesystem.write(file, `before-${ids[i]}-é\n🙂\nline`)),
...del.map((file, i) => Filesystem.write(file, `gone-${ids[i]}\n你好`)),
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([0, i, 255, i % 251]))),
])

const before = await Snapshot.track()
expect(before).toBeTruthy()

await Promise.all([
...mod.map((file, i) => Filesystem.write(file, `after-${ids[i]}-é\n🚀\nline`)),
...add.map((file, i) => Filesystem.write(file, `new-${ids[i]}\nこんにちは`)),
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([9, i, 8, i % 251]))),
...del.map((file) => fs.rm(file)),
])

const after = await Snapshot.track()
expect(after).toBeTruthy()

const diffs = await Snapshot.diffFull(before!, after!)
expect(diffs).toHaveLength(ids.length * 4)

const map = new Map(diffs.map((item) => [item.file, item]))
for (let i = 0; i < ids.length; i++) {
const m = map.get(fwd("mix", `${ids[i]}-mod.txt`))
expect(m).toBeDefined()
expect(m!.before).toBe(`before-${ids[i]}-é\n🙂\nline`)
expect(m!.after).toBe(`after-${ids[i]}-é\n🚀\nline`)
expect(m!.status).toBe("modified")

const d = map.get(fwd("mix", `${ids[i]}-del.txt`))
expect(d).toBeDefined()
expect(d!.before).toBe(`gone-${ids[i]}\n你好`)
expect(d!.after).toBe("")
expect(d!.status).toBe("deleted")

const a = map.get(fwd("mix", `${ids[i]}-add.txt`))
expect(a).toBeDefined()
expect(a!.before).toBe("")
expect(a!.after).toBe(`new-${ids[i]}\nこんにちは`)
expect(a!.status).toBe("added")

const b = map.get(fwd("mix", `${ids[i]}-bin.bin`))
expect(b).toBeDefined()
expect(b!.before).toBe("")
expect(b!.after).toBe("")
expect(b!.additions).toBe(0)
expect(b!.deletions).toBe(0)
expect(b!.status).toBe("modified")
}
},
})
})

test("diffFull preserves git diff order across batch boundaries", async () => {
await using tmp = await bootstrap()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const ids = Array.from({ length: 140 }, (_, i) => i.toString().padStart(3, "0"))

await $`mkdir -p ${tmp.path}/order`.quiet()
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `before-${id}`)))

const before = await Snapshot.track()
expect(before).toBeTruthy()

await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `after-${id}`)))

const after = await Snapshot.track()
expect(after).toBeTruthy()

const expected = ids.map((id) => `order/${id}.txt`)

const diffs = await Snapshot.diffFull(before!, after!)
expect(diffs.map((item) => item.file)).toEqual(expected)
},
})
})

test("diffFull with file modifications", async () => {
await using tmp = await bootstrap()
await Instance.provide({
Expand Down
Loading