Skip to content

Commit 4688eab

Browse files
Zorlinclaude
andcommitted
feat(api): add directory manifest support for folder uploads
Implements directory manifests for organizing multiple files into a tree: - New DirectoryCodec (0xCD04) for directory manifest CIDs - DirectoryManifest type with entries, totalSize, filesCount - Two-phase upload: files uploaded individually, then /directory endpoint finalizes them into a tree structure with POST JSON body - HTML directory browsing with Archivist branding at /data/{cid} - JSON directory listing when Accept header doesn't include text/html - Path resolution within directories via /data/{cid}/path?p=subdir/file - Auto-promotion of single-child root directories API changes: - POST /api/archivist/v1/directory - finalize directory from uploaded files - GET /api/archivist/v1/data/{cid} - now serves HTML for directories - Added MIME types: audio/mpeg, audio/flac, video/webm, etc. - Relaxed filename validation to allow paths like "Album/track.mp3" 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 49688fc commit 4688eab

File tree

8 files changed

+1370
-8
lines changed

8 files changed

+1370
-8
lines changed

archivist/archivisttypes.nim

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ const
4242
Pos2Bn128MrklCodec* = multiCodec("poseidon2-alt_bn_128-merkle-2kb")
4343

4444
ManifestCodec* = multiCodec("codex-manifest")
45+
DirectoryCodec* = MultiCodec(0xCD04) # codex-directory (not yet registered in libp2p)
4546
DatasetRootCodec* = multiCodec("codex-root")
4647
BlockCodec* = multiCodec("codex-block")
4748
SlotRootCodec* = multiCodec("codex-slot-root")
@@ -51,8 +52,8 @@ const
5152
HashesCodecs* = [Sha256HashCodec, Pos2Bn128SpngCodec, Pos2Bn128MrklCodec]
5253

5354
PrimitivesCodecs* = [
54-
ManifestCodec, DatasetRootCodec, BlockCodec, SlotRootCodec, SlotProvingRootCodec,
55-
SlotCellCodec,
55+
ManifestCodec, DirectoryCodec, DatasetRootCodec, BlockCodec, SlotRootCodec,
56+
SlotProvingRootCodec, SlotCellCodec,
5657
]
5758

5859
proc initEmptyCidTable(): ?!Table[(CidVersion, MultiCodec, MultiCodec), Cid] =

archivist/conf.nim

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,16 @@ proc readValue*(
608608
) {.raises: [SerializationError, IOError].} =
609609
val = EthAddress.init(r.readValue(string)).get()
610610

611+
proc readValue*(
612+
r: var TomlReader, val: var Cid
613+
) {.raises: [SerializationError, IOError].} =
614+
let cidStr = r.readValue(string)
615+
let cidResult = Cid.init(cidStr)
616+
if cidResult.isOk:
617+
val = cidResult.get()
618+
else:
619+
raise newException(SerializationError, "Invalid CID: " & cidStr)
620+
611621
proc readValue*(r: var TomlReader, val: var SignedPeerRecord) =
612622
without uri =? r.readValue(string).catch, err:
613623
error "invalid SignedPeerRecord configuration value", error = err.msg

archivist/directorynode.nim

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
## Copyright (c) 2025 Archivist Authors
2+
## Licensed under either of
3+
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
4+
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
5+
## at your option.
6+
## This file may not be copied, modified, or distributed except according to
7+
## those terms.
8+
9+
## Directory operations for ArchivistNode
10+
##
11+
## This module is kept separate from node.nim to avoid importing minprotobuf
12+
## into the main compilation path, which triggers serialization conflicts
13+
## with TOML config loading.
14+
15+
{.push raises: [].}
16+
17+
import pkg/chronos
18+
import pkg/questionable
19+
import pkg/questionable/results
20+
import pkg/libp2p/cid
21+
22+
# Import minprotobuf explicitly to avoid leaking symbols that conflict with serialization
23+
from pkg/libp2p/protobuf/minprotobuf import
24+
ProtoBuffer, initProtoBuffer, getField, getRequiredRepeatedField,
25+
write, finish, ProtoResult
26+
27+
import ./manifest/directory
28+
import ./blocktype as bt
29+
import ./stores
30+
import ./archivisttypes
31+
import ./units
32+
import ./errors
33+
import ./logutils
34+
35+
logScope:
36+
topics = "archivist directorynode"
37+
38+
proc decodeDirectoryManifest*(blk: bt.Block): ?!DirectoryManifest =
39+
## Decode a directory manifest from a block
40+
##
41+
if not ?blk.cid.isDirectory:
42+
return failure "Cid not a directory codec"
43+
44+
var
45+
pbNode = initProtoBuffer(blk.data)
46+
pbEntries: seq[seq[byte]]
47+
totalSize: uint64
48+
name: string
49+
entries: seq[DirectoryEntry]
50+
51+
if pbNode.getRequiredRepeatedField(1, pbEntries).isErr:
52+
return failure("Unable to decode `entries` from directory manifest!")
53+
54+
if pbNode.getField(2, totalSize).isErr:
55+
return failure("Unable to decode `totalSize` from directory manifest!")
56+
57+
if pbNode.getField(3, name).isErr:
58+
return failure("Unable to decode `name` from directory manifest!")
59+
60+
for pbEntryData in pbEntries:
61+
var
62+
pbEntry = initProtoBuffer(pbEntryData)
63+
entryName: string
64+
cidBuf: seq[byte]
65+
size: uint64
66+
isDir: uint32
67+
mimetype: string
68+
69+
if pbEntry.getField(1, entryName).isErr:
70+
return failure("Unable to decode entry `name` from directory manifest!")
71+
72+
if pbEntry.getField(2, cidBuf).isErr:
73+
return failure("Unable to decode entry `cid` from directory manifest!")
74+
75+
if pbEntry.getField(3, size).isErr:
76+
return failure("Unable to decode entry `size` from directory manifest!")
77+
78+
if pbEntry.getField(4, isDir).isErr:
79+
return failure("Unable to decode entry `isDirectory` from directory manifest!")
80+
81+
if pbEntry.getField(5, mimetype).isErr:
82+
return failure("Unable to decode entry `mimetype` from directory manifest!")
83+
84+
let entryCid = ?Cid.init(cidBuf).mapFailure
85+
86+
entries.add(DirectoryEntry(
87+
name: entryName,
88+
cid: entryCid,
89+
size: size.NBytes,
90+
isDirectory: isDir != 0,
91+
mimetype: mimetype,
92+
))
93+
94+
success DirectoryManifest(
95+
entries: entries,
96+
totalSize: totalSize.NBytes,
97+
name: name,
98+
)
99+
100+
proc encodeDirectoryManifest*(directory: DirectoryManifest): seq[byte] =
101+
## Encode a directory manifest to protobuf bytes
102+
##
103+
var pbNode = initProtoBuffer()
104+
105+
for entry in directory.entries:
106+
var pbEntry = initProtoBuffer()
107+
pbEntry.write(1, entry.name)
108+
pbEntry.write(2, entry.cid.data.buffer)
109+
pbEntry.write(3, entry.size.uint64)
110+
pbEntry.write(4, entry.isDirectory.uint32)
111+
if entry.mimetype.len > 0:
112+
pbEntry.write(5, entry.mimetype)
113+
pbEntry.finish()
114+
pbNode.write(1, pbEntry)
115+
116+
pbNode.write(2, directory.totalSize.uint64)
117+
118+
if directory.name.len > 0:
119+
pbNode.write(3, directory.name)
120+
121+
pbNode.finish()
122+
pbNode.buffer
123+
124+
proc storeDirectoryManifest*(
125+
networkStore: NetworkStore, directory: DirectoryManifest
126+
): Future[?!bt.Block] {.async: (raises: [CancelledError]).} =
127+
## Store a directory manifest and return its block
128+
##
129+
let encoded = encodeDirectoryManifest(directory)
130+
131+
without blk =? bt.Block.new(data = encoded, codec = DirectoryCodec), error:
132+
trace "Unable to create block from directory manifest"
133+
return failure(error)
134+
135+
if err =? (await networkStore.putBlock(blk)).errorOption:
136+
trace "Unable to store directory manifest block", cid = blk.cid, err = err.msg
137+
return failure(err)
138+
139+
info "Stored directory manifest",
140+
cid = blk.cid,
141+
entries = directory.entries.len,
142+
totalSize = directory.totalSize
143+
144+
success blk
145+
146+
proc fetchDirectoryManifest*(
147+
networkStore: NetworkStore, cid: Cid
148+
): Future[?!DirectoryManifest] {.async: (raises: [CancelledError]).} =
149+
## Fetch and decode a directory manifest block
150+
##
151+
if err =? cid.isDirectory.errorOption:
152+
return failure "CID has invalid content type for directory manifest {$cid}"
153+
154+
trace "Retrieving directory manifest for cid", cid
155+
156+
without blk =? await networkStore.getBlock(BlockAddress.init(cid)), err:
157+
trace "Error retrieving directory manifest block", cid, err = err.msg
158+
return failure err
159+
160+
trace "Decoding directory manifest for cid", cid
161+
162+
without directory =? decodeDirectoryManifest(blk), err:
163+
trace "Unable to decode as directory manifest", err = err.msg
164+
return failure("Unable to decode as directory manifest")
165+
166+
trace "Decoded directory manifest", cid, entries = directory.entries.len
167+
168+
return directory.success

archivist/manifest/directory.nim

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
## Copyright (c) 2025 Archivist Authors
2+
## Licensed under either of
3+
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
4+
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
5+
## at your option.
6+
## This file may not be copied, modified, or distributed except according to
7+
## those terms.
8+
9+
# This module defines DirectoryManifest for folder/directory support
10+
11+
{.push raises: [].}
12+
13+
# Note: minprotobuf import is NOT included here - it triggers a serialization
14+
# error when imported into a module that gets pulled into TOML config loading.
15+
# The encode/decode procs that need protobuf are in coders.nim instead.
16+
17+
import pkg/libp2p/[cid, multihash, multicodec]
18+
import pkg/questionable/results
19+
20+
import ../errors
21+
import ../units
22+
import ../archivisttypes
23+
24+
type
25+
DirectoryEntry* = object
26+
name*: string
27+
cid*: Cid
28+
size*: NBytes
29+
isDirectory*: bool
30+
mimetype*: string # Empty string = not set
31+
32+
DirectoryManifest* = object
33+
entries*: seq[DirectoryEntry]
34+
totalSize*: NBytes
35+
name*: string # Empty string = not set
36+
37+
############################################################
38+
# Accessors
39+
############################################################
40+
41+
func entries*(self: DirectoryManifest): seq[DirectoryEntry] =
42+
self.entries
43+
44+
func totalSize*(self: DirectoryManifest): NBytes =
45+
self.totalSize
46+
47+
func name*(self: DirectoryManifest): string =
48+
self.name
49+
50+
func filesCount*(self: DirectoryManifest): int =
51+
var count = 0
52+
for entry in self.entries:
53+
if not entry.isDirectory:
54+
inc count
55+
count
56+
57+
func dirsCount*(self: DirectoryManifest): int =
58+
var count = 0
59+
for entry in self.entries:
60+
if entry.isDirectory:
61+
inc count
62+
count
63+
64+
############################################################
65+
# Predicates
66+
############################################################
67+
68+
func isDirectory*(cid: Cid): ?!bool =
69+
success (DirectoryCodec == ?cid.contentType().mapFailure(ArchivistError))
70+
71+
func isDirectory*(mc: MultiCodec): ?!bool =
72+
success mc == DirectoryCodec
73+
74+
## Note: encode/decode procs are in coders.nim to avoid minprotobuf import
75+
## which triggers a serialization error when imported here.
76+
77+
############################################################
78+
# Constructors
79+
############################################################
80+
81+
func new*(
82+
T: type DirectoryManifest,
83+
entries: seq[DirectoryEntry] = @[],
84+
name: string = "",
85+
): DirectoryManifest =
86+
var total: NBytes = 0.NBytes
87+
for entry in entries:
88+
total = NBytes(total.int + entry.size.int)
89+
90+
T(
91+
entries: entries,
92+
totalSize: total,
93+
name: name,
94+
)
95+
96+
func new*(
97+
T: type DirectoryEntry,
98+
name: string,
99+
cid: Cid,
100+
size: NBytes,
101+
isDirectory: bool = false,
102+
mimetype: string = "",
103+
): DirectoryEntry =
104+
T(
105+
name: name,
106+
cid: cid,
107+
size: size,
108+
isDirectory: isDirectory,
109+
mimetype: mimetype,
110+
)
111+
112+
############################################################
113+
# String representation
114+
############################################################
115+
116+
func `$`*(entry: DirectoryEntry): string =
117+
result = "DirectoryEntry(name: " & entry.name
118+
result &= ", cid: " & $entry.cid
119+
result &= ", size: " & $entry.size
120+
result &= ", isDirectory: " & $entry.isDirectory
121+
if entry.mimetype.len > 0:
122+
result &= ", mimetype: " & entry.mimetype
123+
result &= ")"
124+
125+
func `$`*(self: DirectoryManifest): string =
126+
result = "DirectoryManifest("
127+
if self.name.len > 0:
128+
result &= "name: " & self.name & ", "
129+
result &= "totalSize: " & $self.totalSize
130+
result &= ", entries: " & $self.entries.len & " items)"
131+
132+
############################################################
133+
# Equality
134+
############################################################
135+
136+
func `==`*(a, b: DirectoryEntry): bool =
137+
a.name == b.name and
138+
a.cid == b.cid and
139+
a.size == b.size and
140+
a.isDirectory == b.isDirectory and
141+
a.mimetype == b.mimetype
142+
143+
func `==`*(a, b: DirectoryManifest): bool =
144+
a.entries == b.entries and
145+
a.totalSize == b.totalSize and
146+
a.name == b.name
147+
148+
############################################################
149+
# Helpers
150+
############################################################
151+
152+
proc findEntry*(self: DirectoryManifest, name: string, foundEntry: var DirectoryEntry): bool =
153+
## Find an entry by name in the directory
154+
## Sets foundEntry and returns true if found, returns false otherwise
155+
for entry in self.entries:
156+
if entry.name == name:
157+
foundEntry = entry
158+
return true
159+
return false
160+
161+
proc sortByName(entries: var seq[DirectoryEntry]) =
162+
## Simple insertion sort to avoid importing std/algorithm which triggers
163+
## a serialization error during TOML config loading
164+
for i in 1 ..< entries.len:
165+
let key = entries[i]
166+
var j = i - 1
167+
while j >= 0 and entries[j].name > key.name:
168+
entries[j + 1] = entries[j]
169+
dec j
170+
entries[j + 1] = key
171+
172+
proc sortedEntries*(self: DirectoryManifest): seq[DirectoryEntry] =
173+
## Return entries sorted: directories first, then files, alphabetically
174+
var dirs: seq[DirectoryEntry]
175+
var files: seq[DirectoryEntry]
176+
177+
for entry in self.entries:
178+
if entry.isDirectory:
179+
dirs.add(entry)
180+
else:
181+
files.add(entry)
182+
183+
sortByName(dirs)
184+
sortByName(files)
185+
186+
result = dirs & files

0 commit comments

Comments
 (0)