diff --git a/assets/dir-index-html/dir-index.html b/assets/dir-index-html/dir-index.html index ec00da79880..49ac2bb1f1a 100644 --- a/assets/dir-index-html/dir-index.html +++ b/assets/dir-index-html/dir-index.html @@ -31,7 +31,7 @@
- +
diff --git a/assets/dir-index-html/src/dir-index.html b/assets/dir-index-html/src/dir-index.html index f3dfd632878..376c4cd7705 100644 --- a/assets/dir-index-html/src/dir-index.html +++ b/assets/dir-index-html/src/dir-index.html @@ -30,7 +30,7 @@
- +
diff --git a/assets/dir-index-html/test/main.go b/assets/dir-index-html/test/main.go index c02523a9f40..43b4a098101 100644 --- a/assets/dir-index-html/test/main.go +++ b/assets/dir-index-html/test/main.go @@ -12,14 +12,15 @@ const templateFile = "../dir-index.html" // Copied from go-ipfs/core/corehttp/gateway_indexPage.go type listingTemplateData struct { - GatewayURL string - DNSLink bool - Listing []directoryItem - Size string - Path string - Breadcrumbs []breadcrumb - BackLink string - Hash string + GatewayURL string + DNSLink bool + Listing []directoryItem + Size string + Path string + Breadcrumbs []breadcrumb + BackLink string + Hash string + FastDirIndexThreshold int } type directoryItem struct { diff --git a/config/gateway.go b/config/gateway.go index e85cda98688..486089b7123 100644 --- a/config/gateway.go +++ b/config/gateway.go @@ -53,7 +53,15 @@ type Gateway struct { // } PathPrefixes []string - // FIXME: Not yet implemented + // FastDirIndexThreshold is the maximum number of items in a directory + // before the Gateway switches to a shallow, faster listing which only + // requires the root node. This allows for listing big directories fast, + // without the linear slowdown caused by reading size metadata from child + // nodes. + // Setting to 0 will enable fast listings for all directories. + FastDirIndexThreshold *OptionalInteger `json:",omitempty"` + + // FIXME: Not yet implemented: https://github.com/ipfs/go-ipfs/issues/8059 APICommands []string // NoFetch configures the gateway to _not_ fetch blocks in response to diff --git a/core/coreapi/unixfs.go b/core/coreapi/unixfs.go index 5d3d7e80e30..3fd1fff6126 100644 --- a/core/coreapi/unixfs.go +++ b/core/coreapi/unixfs.go @@ -302,7 +302,7 @@ func (api *UnixfsAPI) processLink(ctx context.Context, linkres ft.LinkResult, se } func (api *UnixfsAPI) lsFromLinksAsync(ctx context.Context, dir uio.Directory, settings *options.UnixfsLsSettings) (<-chan coreiface.DirEntry, error) { - out := make(chan coreiface.DirEntry) + out := make(chan coreiface.DirEntry, uio.DefaultShardWidth) go func() { defer close(out) diff --git a/core/corehttp/gateway.go b/core/corehttp/gateway.go index 2e794b53ffc..a4ae5383179 100644 --- a/core/corehttp/gateway.go +++ b/core/corehttp/gateway.go @@ -16,9 +16,10 @@ import ( ) type GatewayConfig struct { - Headers map[string][]string - Writable bool - PathPrefixes []string + Headers map[string][]string + Writable bool + PathPrefixes []string + FastDirIndexThreshold int } // A helper function to clean up a set of headers: @@ -89,9 +90,10 @@ func GatewayOption(writable bool, paths ...string) ServeOption { }, headers[ACEHeadersName]...)) var gateway http.Handler = newGatewayHandler(GatewayConfig{ - Headers: headers, - Writable: writable, - PathPrefixes: cfg.Gateway.PathPrefixes, + Headers: headers, + Writable: writable, + PathPrefixes: cfg.Gateway.PathPrefixes, + FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)), }, api) gateway = otelhttp.NewHandler(gateway, "Gateway.Request") diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go index f91e2df3b37..b318a641a09 100644 --- a/core/corehttp/gateway_handler_unixfs.go +++ b/core/corehttp/gateway_handler_unixfs.go @@ -18,6 +18,7 @@ import ( func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { ctx, span := tracing.Span(ctx, "Gateway", "ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() + // Handling UnixFS dr, err := i.api.Unixfs().Get(ctx, resolvedPath) if err != nil { @@ -39,6 +40,7 @@ func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, internalWebError(w, fmt.Errorf("unsupported UnixFS type")) return } + logger.Debugw("serving unixfs directory", "path", contentPath) i.serveDirectory(ctx, w, r, resolvedPath, contentPath, dir, begin, logger) } diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go index f462e52f8f6..3eda85e1094 100644 --- a/core/corehttp/gateway_handler_unixfs_dir.go +++ b/core/corehttp/gateway_handler_unixfs_dir.go @@ -15,6 +15,7 @@ import ( "github.com/ipfs/go-ipfs/tracing" path "github.com/ipfs/go-path" "github.com/ipfs/go-path/resolver" + options "github.com/ipfs/interface-go-ipfs-core/options" ipath "github.com/ipfs/interface-go-ipfs-core/path" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -108,36 +109,46 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit return } + // Optimization 1: + // List children without fetching their root blocks (fast, but no size info) + results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false)) + if err != nil { + internalWebError(w, err) + return + } + // storage for directory listing - var dirListing []directoryItem - dirit := dir.Entries() - for dirit.Next() { - size := "?" - if s, err := dirit.Node().Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } + dirListing := make([]directoryItem, 0, len(results)) - resolved, err := i.api.ResolvePath(ctx, ipath.Join(resolvedPath, dirit.Name())) - if err != nil { + for link := range results { + if link.Err != nil { internalWebError(w, err) return } - hash := resolved.Cid().String() - - // See comment above where originalUrlPath is declared. + hash := link.Cid.String() di := directoryItem{ - Size: size, - Name: dirit.Name(), - Path: gopath.Join(originalUrlPath, dirit.Name()), + Size: "", // no size because we did not fetch child nodes + Name: link.Name, + Path: gopath.Join(originalUrlPath, link.Name), Hash: hash, ShortHash: shortHash(hash), } dirListing = append(dirListing, di) } - if dirit.Err() != nil { - internalWebError(w, dirit.Err()) - return + + // Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold + if len(dirListing) < i.config.FastDirIndexThreshold { + dirit := dir.Entries() + linkNo := 0 + for dirit.Next() { + size := "?" + if s, err := dirit.Node().Size(); err == nil { + // Size may not be defined/supported. Continue anyways. + size = humanize.Bytes(uint64(s)) + } + dirListing[linkNo].Size = size + linkNo++ + } } // construct the correct back link @@ -186,14 +197,15 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // See comment above where originalUrlPath is declared. tplData := listingTemplateData{ - GatewayURL: gwURL, - DNSLink: dnslink, - Listing: dirListing, - Size: size, - Path: contentPath.String(), - Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), - BackLink: backLink, - Hash: hash, + GatewayURL: gwURL, + DNSLink: dnslink, + Listing: dirListing, + Size: size, + Path: contentPath.String(), + Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), + BackLink: backLink, + Hash: hash, + FastDirIndexThreshold: i.config.FastDirIndexThreshold, } logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash) diff --git a/core/corehttp/gateway_indexPage.go b/core/corehttp/gateway_indexPage.go index fbea91649d3..6cc548cdc4d 100644 --- a/core/corehttp/gateway_indexPage.go +++ b/core/corehttp/gateway_indexPage.go @@ -12,14 +12,15 @@ import ( // structs for directory listing type listingTemplateData struct { - GatewayURL string - DNSLink bool - Listing []directoryItem - Size string - Path string - Breadcrumbs []breadcrumb - BackLink string - Hash string + GatewayURL string + DNSLink bool + Listing []directoryItem + Size string + Path string + Breadcrumbs []breadcrumb + BackLink string + Hash string + FastDirIndexThreshold int } type directoryItem struct { diff --git a/docs/config.md b/docs/config.md index 519a7c10546..77f34abe400 100644 --- a/docs/config.md +++ b/docs/config.md @@ -51,6 +51,7 @@ config file at runtime. - [`Gateway.NoDNSLink`](#gatewaynodnslink) - [`Gateway.HTTPHeaders`](#gatewayhttpheaders) - [`Gateway.RootRedirect`](#gatewayrootredirect) + - [`Gateway.FastDirIndexThreshold`](#gatewayfastdirindexthreshold) - [`Gateway.Writable`](#gatewaywritable) - [`Gateway.PathPrefixes`](#gatewaypathprefixes) - [`Gateway.PublicGateways`](#gatewaypublicgateways) @@ -648,6 +649,20 @@ Default: `""` Type: `string` (url) +### `Gateway.FastDirIndexThreshold` + +The maximum number of items in a directory before the Gateway switches +to a shallow, faster listing which only requires the root node. + +This allows for fast listings of big directories, without the linear slowdown caused +by reading size metadata from child nodes. + +Setting to 0 will enable fast listings for all directories. + +Default: `100` + +Type: `optionalInteger` + ### `Gateway.Writable` A boolean to configure whether the gateway is writeable or not. diff --git a/test/sharness/lib/test-lib.sh b/test/sharness/lib/test-lib.sh index 38f12a0250c..0757c323cf9 100644 --- a/test/sharness/lib/test-lib.sh +++ b/test/sharness/lib/test-lib.sh @@ -388,6 +388,15 @@ test_should_contain() { fi } +test_should_not_contain() { + test "$#" = 2 || error "bug in the test script: not 2 parameters to test_should_not_contain" + if grep -q "$1" "$2" + then + echo "'$2' contains undesired value '$1'" + return 1 + fi +} + test_str_contains() { find=$1 shift diff --git a/test/sharness/t0115-gateway-dir-listing.sh b/test/sharness/t0115-gateway-dir-listing.sh index 0fc86ed7904..91ab8afe1fa 100755 --- a/test/sharness/t0115-gateway-dir-listing.sh +++ b/test/sharness/t0115-gateway-dir-listing.sh @@ -28,7 +28,9 @@ test_expect_success "Add the test directory" ' echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt && echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt && DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) && - FILE_CID=$(ipfs files stat /ipfs/$DIR_CID/ą/ę/file-źł.txt | head -1) + FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) && + FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size) + echo "$FILE_CID / $FILE_SIZE" ' ## ============================================================================ @@ -135,6 +137,28 @@ test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.io test_should_contain "" list_response ' +## ============================================================================ +## Test dir listing of a big directory +## ============================================================================ + +test_expect_success "dir listing should resolve child sizes if under Gateway.FastDirIndexThreshold" ' + curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response && + test_should_contain "/ipfs/${FILE_CID}?filename" list_response && + test_should_contain ">${FILE_SIZE} B" list_response +' + +# force fast dir index for all responses +ipfs config --json Gateway.FastDirIndexThreshold 0 +# restart daemon to apply config changes +test_kill_ipfs_daemon +test_launch_ipfs_daemon + +test_expect_success "dir listing should not resolve child sizes beyond Gateway.FastDirIndexThreshold" ' + curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response && + test_should_contain "/ipfs/${FILE_CID}?filename" list_response && + test_should_not_contain ">${FILE_SIZE} B" list_response +' + ## ============================================================================ ## End of tests, cleanup ## ============================================================================