diff --git a/assets/dir-index-html/dir-index.html b/assets/dir-index-html/dir-index.html
index ec00da79880..49ac2bb1f1a 100644
--- a/assets/dir-index-html/dir-index.html
+++ b/assets/dir-index-html/dir-index.html
@@ -31,7 +31,7 @@
diff --git a/assets/dir-index-html/src/dir-index.html b/assets/dir-index-html/src/dir-index.html
index f3dfd632878..376c4cd7705 100644
--- a/assets/dir-index-html/src/dir-index.html
+++ b/assets/dir-index-html/src/dir-index.html
@@ -30,7 +30,7 @@
diff --git a/assets/dir-index-html/test/main.go b/assets/dir-index-html/test/main.go
index c02523a9f40..43b4a098101 100644
--- a/assets/dir-index-html/test/main.go
+++ b/assets/dir-index-html/test/main.go
@@ -12,14 +12,15 @@ const templateFile = "../dir-index.html"
// Copied from go-ipfs/core/corehttp/gateway_indexPage.go
type listingTemplateData struct {
- GatewayURL string
- DNSLink bool
- Listing []directoryItem
- Size string
- Path string
- Breadcrumbs []breadcrumb
- BackLink string
- Hash string
+ GatewayURL string
+ DNSLink bool
+ Listing []directoryItem
+ Size string
+ Path string
+ Breadcrumbs []breadcrumb
+ BackLink string
+ Hash string
+ FastDirIndexThreshold int
}
type directoryItem struct {
diff --git a/config/gateway.go b/config/gateway.go
index e85cda98688..486089b7123 100644
--- a/config/gateway.go
+++ b/config/gateway.go
@@ -53,7 +53,15 @@ type Gateway struct {
// }
PathPrefixes []string
- // FIXME: Not yet implemented
+ // FastDirIndexThreshold is the maximum number of items in a directory
+ // before the Gateway switches to a shallow, faster listing which only
+ // requires the root node. This allows for listing big directories fast,
+ // without the linear slowdown caused by reading size metadata from child
+ // nodes.
+ // Setting to 0 will enable fast listings for all directories.
+ FastDirIndexThreshold *OptionalInteger `json:",omitempty"`
+
+ // FIXME: Not yet implemented: https://github.com/ipfs/go-ipfs/issues/8059
APICommands []string
// NoFetch configures the gateway to _not_ fetch blocks in response to
diff --git a/core/coreapi/unixfs.go b/core/coreapi/unixfs.go
index 5d3d7e80e30..3fd1fff6126 100644
--- a/core/coreapi/unixfs.go
+++ b/core/coreapi/unixfs.go
@@ -302,7 +302,7 @@ func (api *UnixfsAPI) processLink(ctx context.Context, linkres ft.LinkResult, se
}
func (api *UnixfsAPI) lsFromLinksAsync(ctx context.Context, dir uio.Directory, settings *options.UnixfsLsSettings) (<-chan coreiface.DirEntry, error) {
- out := make(chan coreiface.DirEntry)
+ out := make(chan coreiface.DirEntry, uio.DefaultShardWidth)
go func() {
defer close(out)
diff --git a/core/corehttp/gateway.go b/core/corehttp/gateway.go
index 2e794b53ffc..a4ae5383179 100644
--- a/core/corehttp/gateway.go
+++ b/core/corehttp/gateway.go
@@ -16,9 +16,10 @@ import (
)
type GatewayConfig struct {
- Headers map[string][]string
- Writable bool
- PathPrefixes []string
+ Headers map[string][]string
+ Writable bool
+ PathPrefixes []string
+ FastDirIndexThreshold int
}
// A helper function to clean up a set of headers:
@@ -89,9 +90,10 @@ func GatewayOption(writable bool, paths ...string) ServeOption {
}, headers[ACEHeadersName]...))
var gateway http.Handler = newGatewayHandler(GatewayConfig{
- Headers: headers,
- Writable: writable,
- PathPrefixes: cfg.Gateway.PathPrefixes,
+ Headers: headers,
+ Writable: writable,
+ PathPrefixes: cfg.Gateway.PathPrefixes,
+ FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)),
}, api)
gateway = otelhttp.NewHandler(gateway, "Gateway.Request")
diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go
index f91e2df3b37..b318a641a09 100644
--- a/core/corehttp/gateway_handler_unixfs.go
+++ b/core/corehttp/gateway_handler_unixfs.go
@@ -18,6 +18,7 @@ import (
func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String())))
defer span.End()
+
// Handling UnixFS
dr, err := i.api.Unixfs().Get(ctx, resolvedPath)
if err != nil {
@@ -39,6 +40,7 @@ func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter,
internalWebError(w, fmt.Errorf("unsupported UnixFS type"))
return
}
+
logger.Debugw("serving unixfs directory", "path", contentPath)
i.serveDirectory(ctx, w, r, resolvedPath, contentPath, dir, begin, logger)
}
diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go
index f462e52f8f6..3eda85e1094 100644
--- a/core/corehttp/gateway_handler_unixfs_dir.go
+++ b/core/corehttp/gateway_handler_unixfs_dir.go
@@ -15,6 +15,7 @@ import (
"github.com/ipfs/go-ipfs/tracing"
path "github.com/ipfs/go-path"
"github.com/ipfs/go-path/resolver"
+ options "github.com/ipfs/interface-go-ipfs-core/options"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
@@ -108,36 +109,46 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
return
}
+ // Optimization 1:
+ // List children without fetching their root blocks (fast, but no size info)
+ results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false))
+ if err != nil {
+ internalWebError(w, err)
+ return
+ }
+
// storage for directory listing
- var dirListing []directoryItem
- dirit := dir.Entries()
- for dirit.Next() {
- size := "?"
- if s, err := dirit.Node().Size(); err == nil {
- // Size may not be defined/supported. Continue anyways.
- size = humanize.Bytes(uint64(s))
- }
+ dirListing := make([]directoryItem, 0, len(results))
- resolved, err := i.api.ResolvePath(ctx, ipath.Join(resolvedPath, dirit.Name()))
- if err != nil {
+ for link := range results {
+ if link.Err != nil {
internalWebError(w, err)
return
}
- hash := resolved.Cid().String()
-
- // See comment above where originalUrlPath is declared.
+ hash := link.Cid.String()
di := directoryItem{
- Size: size,
- Name: dirit.Name(),
- Path: gopath.Join(originalUrlPath, dirit.Name()),
+ Size: "", // no size because we did not fetch child nodes
+ Name: link.Name,
+ Path: gopath.Join(originalUrlPath, link.Name),
Hash: hash,
ShortHash: shortHash(hash),
}
dirListing = append(dirListing, di)
}
- if dirit.Err() != nil {
- internalWebError(w, dirit.Err())
- return
+
+ // Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold
+ if len(dirListing) < i.config.FastDirIndexThreshold {
+ dirit := dir.Entries()
+ linkNo := 0
+ for dirit.Next() {
+ size := "?"
+ if s, err := dirit.Node().Size(); err == nil {
+ // Size may not be defined/supported. Continue anyways.
+ size = humanize.Bytes(uint64(s))
+ }
+ dirListing[linkNo].Size = size
+ linkNo++
+ }
}
// construct the correct back link
@@ -186,14 +197,15 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// See comment above where originalUrlPath is declared.
tplData := listingTemplateData{
- GatewayURL: gwURL,
- DNSLink: dnslink,
- Listing: dirListing,
- Size: size,
- Path: contentPath.String(),
- Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
- BackLink: backLink,
- Hash: hash,
+ GatewayURL: gwURL,
+ DNSLink: dnslink,
+ Listing: dirListing,
+ Size: size,
+ Path: contentPath.String(),
+ Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
+ BackLink: backLink,
+ Hash: hash,
+ FastDirIndexThreshold: i.config.FastDirIndexThreshold,
}
logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash)
diff --git a/core/corehttp/gateway_indexPage.go b/core/corehttp/gateway_indexPage.go
index fbea91649d3..6cc548cdc4d 100644
--- a/core/corehttp/gateway_indexPage.go
+++ b/core/corehttp/gateway_indexPage.go
@@ -12,14 +12,15 @@ import (
// structs for directory listing
type listingTemplateData struct {
- GatewayURL string
- DNSLink bool
- Listing []directoryItem
- Size string
- Path string
- Breadcrumbs []breadcrumb
- BackLink string
- Hash string
+ GatewayURL string
+ DNSLink bool
+ Listing []directoryItem
+ Size string
+ Path string
+ Breadcrumbs []breadcrumb
+ BackLink string
+ Hash string
+ FastDirIndexThreshold int
}
type directoryItem struct {
diff --git a/docs/config.md b/docs/config.md
index 519a7c10546..77f34abe400 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -51,6 +51,7 @@ config file at runtime.
- [`Gateway.NoDNSLink`](#gatewaynodnslink)
- [`Gateway.HTTPHeaders`](#gatewayhttpheaders)
- [`Gateway.RootRedirect`](#gatewayrootredirect)
+ - [`Gateway.FastDirIndexThreshold`](#gatewayfastdirindexthreshold)
- [`Gateway.Writable`](#gatewaywritable)
- [`Gateway.PathPrefixes`](#gatewaypathprefixes)
- [`Gateway.PublicGateways`](#gatewaypublicgateways)
@@ -648,6 +649,20 @@ Default: `""`
Type: `string` (url)
+### `Gateway.FastDirIndexThreshold`
+
+The maximum number of items in a directory before the Gateway switches
+to a shallow, faster listing which only requires the root node.
+
+This allows for fast listings of big directories, without the linear slowdown caused
+by reading size metadata from child nodes.
+
+Setting to 0 will enable fast listings for all directories.
+
+Default: `100`
+
+Type: `optionalInteger`
+
### `Gateway.Writable`
A boolean to configure whether the gateway is writeable or not.
diff --git a/test/sharness/lib/test-lib.sh b/test/sharness/lib/test-lib.sh
index 38f12a0250c..0757c323cf9 100644
--- a/test/sharness/lib/test-lib.sh
+++ b/test/sharness/lib/test-lib.sh
@@ -388,6 +388,15 @@ test_should_contain() {
fi
}
+test_should_not_contain() {
+ test "$#" = 2 || error "bug in the test script: not 2 parameters to test_should_not_contain"
+ if grep -q "$1" "$2"
+ then
+ echo "'$2' contains undesired value '$1'"
+ return 1
+ fi
+}
+
test_str_contains() {
find=$1
shift
diff --git a/test/sharness/t0115-gateway-dir-listing.sh b/test/sharness/t0115-gateway-dir-listing.sh
index 0fc86ed7904..91ab8afe1fa 100755
--- a/test/sharness/t0115-gateway-dir-listing.sh
+++ b/test/sharness/t0115-gateway-dir-listing.sh
@@ -28,7 +28,9 @@ test_expect_success "Add the test directory" '
echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt &&
echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt &&
DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) &&
- FILE_CID=$(ipfs files stat /ipfs/$DIR_CID/ą/ę/file-źł.txt | head -1)
+ FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) &&
+ FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size)
+ echo "$FILE_CID / $FILE_SIZE"
'
## ============================================================================
@@ -135,6 +137,28 @@ test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.io
test_should_contain "" list_response
'
+## ============================================================================
+## Test dir listing of a big directory
+## ============================================================================
+
+test_expect_success "dir listing should resolve child sizes if under Gateway.FastDirIndexThreshold" '
+ curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
+ test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
+ test_should_contain ">${FILE_SIZE} B" list_response
+'
+
+# force fast dir index for all responses
+ipfs config --json Gateway.FastDirIndexThreshold 0
+# restart daemon to apply config changes
+test_kill_ipfs_daemon
+test_launch_ipfs_daemon
+
+test_expect_success "dir listing should not resolve child sizes beyond Gateway.FastDirIndexThreshold" '
+ curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
+ test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
+ test_should_not_contain ">${FILE_SIZE} B" list_response
+'
+
## ============================================================================
## End of tests, cleanup
## ============================================================================