Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ The following emojis are used to highlight certain changes:
### Changed

- `chunker` refactored to reduce overall memory use by reducing heap fragmentation [#649](https://github.com/ipfs/boxo/pull/649)
- `gateway` deserialized responses will have `Last-Modified` set to value from optional UnixFS 1.5 modification time field (if present in DAG) and a matching `If-Modified-Since` will return `304 Not Modified` (UnixFS 1.5 files only) [#659](https://github.com/ipfs/boxo/pull/659)

### Removed

Expand Down
12 changes: 12 additions & 0 deletions gateway/backend_blocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ func (bb *BlocksBackend) Get(ctx context.Context, path path.ImmutablePath, range
return md, nil, err
}

// Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5)
mtime := f.ModTime()
if !mtime.IsZero() {
md.ModTime = mtime
}

if d, ok := f.(files.Directory); ok {
dir, err := uio.NewDirectoryFromNode(bb.dagService, nd)
if err != nil {
Expand Down Expand Up @@ -231,6 +237,12 @@ func (bb *BlocksBackend) Head(ctx context.Context, path path.ImmutablePath) (Con
return ContentPathMetadata{}, nil, err
}

// Set modification time in ContentPathMetadata if found in dag-pb's optional mtime field (UnixFS 1.5)
mtime := fileNode.ModTime()
if !mtime.IsZero() {
md.ModTime = mtime
}

sz, err := fileNode.Size()
if err != nil {
return ContentPathMetadata{}, nil, err
Expand Down
3 changes: 2 additions & 1 deletion gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ type ContentPathMetadata struct {
PathSegmentRoots []cid.Cid
LastSegment path.ImmutablePath
LastSegmentRemainder []string
ContentType string // Only used for UnixFS requests
ContentType string // Only used for UnixFS requests
ModTime time.Time // Optional, non-zero values may be present in UnixFS 1.5 DAGs
}

// ByteRange describes a range request within a UnixFS file. "From" and "To" mostly
Expand Down
160 changes: 160 additions & 0 deletions gateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,60 @@ func TestHeaders(t *testing.T) {
test(dagCborResponseFormat, dagCborPath)
})

// We have UnixFS1.5 tests in TestHeadersUnixFSModeModTime, here we test default behavior (DAG without modtime)
t.Run("If-Modified-Since is noop against DAG without optional UnixFS 1.5 mtime", func(t *testing.T) {
test := func(responseFormat string, path string) {
t.Run(responseFormat, func(t *testing.T) {
// Make regular request and read Last-Modified
url := ts.URL + path
req := mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
res := mustDoWithoutRedirect(t, req)
_, err := io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
lastModified := res.Header.Get("Last-Modified")
require.Empty(t, lastModified)

// Make second request with If-Modified-Since far in past and expect normal response
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", "Mon, 13 Jun 2000 22:18:32 GMT")
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
})
}

test("", dirPath)
test("text/html", dirPath)
test(carResponseFormat, dirPath)
test(rawResponseFormat, dirPath)
test(tarResponseFormat, dirPath)

test("", hamtFilePath)
test("text/html", hamtFilePath)
test(carResponseFormat, hamtFilePath)
test(rawResponseFormat, hamtFilePath)
test(tarResponseFormat, hamtFilePath)

test("", filePath)
test("text/html", filePath)
test(carResponseFormat, filePath)
test(rawResponseFormat, filePath)
test(tarResponseFormat, filePath)

test("", dagCborPath)
test("text/html", dagCborPath+"/")
test(carResponseFormat, dagCborPath)
test(rawResponseFormat, dagCborPath)
test(dagJsonResponseFormat, dagCborPath)
test(dagCborResponseFormat, dagCborPath)
})

t.Run("X-Ipfs-Roots contains expected values", func(t *testing.T) {
test := func(responseFormat string, path string, roots string) {
t.Run(responseFormat, func(t *testing.T) {
Expand Down Expand Up @@ -495,6 +549,112 @@ func TestHeaders(t *testing.T) {
})
}

// Testing a DAG with (optional) UnixFS1.5 modification time
func TestHeadersUnixFSModeModTime(t *testing.T) {
t.Parallel()

ts, _, root := newTestServerAndNode(t, "unixfs-dir-with-mode-mtime.car")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ℹ️ created with Kubo 0.30.0-rc1

var (
rootCID = root.String() // "bafybeidbcy4u6y55gsemlubd64zk53xoxs73ifd6rieejxcr7xy46mjvky"
filePath = "/ipfs/" + rootCID + "/file1"
dirPath = "/ipfs/" + rootCID + "/dir1/"
)

t.Run("If-Modified-Since matching UnixFS 1.5 modtime returns Not Modified", func(t *testing.T) {
test := func(responseFormat string, path string, entityType string, supported bool) {
t.Run(fmt.Sprintf("%s/%s support=%t", responseFormat, entityType, supported), func(t *testing.T) {
// Make regular request and read Last-Modified
url := ts.URL + path
req := mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
res := mustDoWithoutRedirect(t, req)
_, err := io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
require.Equal(t, http.StatusOK, res.StatusCode)
lastModified := res.Header.Get("Last-Modified")
if supported {
assert.NotEmpty(t, lastModified)
} else {
assert.Empty(t, lastModified)
lastModified = "Mon, 13 Jun 2022 22:18:32 GMT" // manually set value for use in next steps
}

ifModifiedSinceTime, err := time.Parse(time.RFC1123, lastModified)
require.NoError(t, err)
oneHourBefore := ifModifiedSinceTime.Add(-1 * time.Hour).Truncate(time.Second)
oneHourAfter := ifModifiedSinceTime.Add(1 * time.Hour).Truncate(time.Second)
oneHourBeforeStr := oneHourBefore.Format(time.RFC1123)
oneHourAfterStr := oneHourAfter.Format(time.RFC1123)
lastModifiedStr := ifModifiedSinceTime.Format(time.RFC1123)

// Make second request with If-Modified-Since and value read from response to first request
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", lastModifiedStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
if supported {
// 304 on exact match, can skip body
assert.Equal(t, http.StatusNotModified, res.StatusCode)
} else {
assert.Equal(t, http.StatusOK, res.StatusCode)
}

// Make third request with If-Modified-Since 1h before value read from response to first request
// and expect HTTP 200
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", oneHourBeforeStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
// always return 200 with body because mtime from unixfs is after value from If-Modified-Since
// so it counts as an update
assert.Equal(t, http.StatusOK, res.StatusCode)

// Make third request with If-Modified-Since 1h after value read from response to first request
// and expect HTTP 200
req = mustNewRequest(t, http.MethodGet, url, nil)
req.Header.Add("Accept", responseFormat)
req.Header.Add("If-Modified-Since", oneHourAfterStr)
res = mustDoWithoutRedirect(t, req)
_, err = io.Copy(io.Discard, res.Body)
require.NoError(t, err)
defer res.Body.Close()
if supported {
// 304 because mtime from unixfs is before value from If-Modified-Since
// so no update, can skip body
assert.Equal(t, http.StatusNotModified, res.StatusCode)
} else {
assert.Equal(t, http.StatusOK, res.StatusCode)
}
})
}

file, dir := "file", "directory"
// supported on file-based web responses
test("", filePath, file, true)
test("text/html", filePath, file, true)

// not supported on other formats
// we may implement support for If-Modified-Since for below request types
// if users raise the need, but If-None-Match is way better
test(carResponseFormat, filePath, file, false)
test(rawResponseFormat, filePath, file, false)
test(tarResponseFormat, filePath, file, false)

test("", dirPath, dir, false)
test("text/html", dirPath, dir, false)
test(carResponseFormat, dirPath, dir, false)
test(rawResponseFormat, dirPath, dir, false)
test(tarResponseFormat, dirPath, dir, false)
})
}

func TestGoGetSupport(t *testing.T) {
ts, _, root := newTestServerAndNode(t, "fixtures.car")

Expand Down
79 changes: 75 additions & 4 deletions gateway/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,11 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) {
return
}

// Detect when If-Modified-Since HTTP header + UnixFS 1.5 allow returning HTTP 304 Not Modified.
if i.handleIfModifiedSince(w, r, rq) {
return
}

// Support custom response formats passed via ?format or Accept HTTP header
switch responseFormat {
case "", jsonResponseFormat, cborResponseFormat:
Expand Down Expand Up @@ -410,18 +415,25 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath
}

if lastMod.IsZero() {
// Otherwise, we set Last-Modified to the current time to leverage caching heuristics
// If no lastMod, set Last-Modified to the current time to leverage caching heuristics
// built into modern browsers: https://github.com/ipfs/kubo/pull/8074#pullrequestreview-645196768
modtime = time.Now()
} else {
// set Last-Modified to a meaningful value e.g. one read from dag-pb (UnixFS 1.5, mtime field)
// or the last time DNSLink / IPNS Record was modified / resoved or cache
modtime = lastMod
}

} else {
w.Header().Set("Cache-Control", immutableCacheControl)
modtime = noModtime // disable Last-Modified

// TODO: consider setting Last-Modified if UnixFS V1.5 ever gets released
// with metadata: https://github.com/ipfs/kubo/issues/6920
if lastMod.IsZero() {
// (noop) skip Last-Modified on immutable response
modtime = noModtime
} else {
// set Last-Modified to value read from dag-pb (UnixFS 1.5, mtime field)
modtime = lastMod
}
}

return modtime
Expand Down Expand Up @@ -507,6 +519,21 @@ func setIpfsRootsHeader(w http.ResponseWriter, rq *requestData, md *ContentPathM
w.Header().Set("X-Ipfs-Roots", rootCidList)
}

// lastModifiedMatch returns true if we can respond with HTTP 304 Not Modified
// It compares If-Modified-Since with logical modification time read from DAG
// (e.g. UnixFS 1.5 modtime, if present)
func lastModifiedMatch(ifModifiedSinceHeader string, lastModified time.Time) bool {
if ifModifiedSinceHeader == "" || lastModified.IsZero() {
return false
}
ifModifiedSinceTime, err := time.Parse(time.RFC1123, ifModifiedSinceHeader)
if err != nil {
return false
}
// ignoring fractional seconds (as HTTP dates don't include fractional seconds)
return !lastModified.Truncate(time.Second).After(ifModifiedSinceTime)
}

// etagMatch evaluates if we can respond with HTTP 304 Not Modified
// It supports multiple weak and strong etags passed in If-None-Match string
// including the wildcard one.
Expand Down Expand Up @@ -745,6 +772,50 @@ func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, rq *
return false
}

func (i *handler) handleIfModifiedSince(w http.ResponseWriter, r *http.Request, rq *requestData) bool {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ℹ️ this is based on what we already had in handleIfNoneMatch to support If-None-Match, which operates on Etags.

This one is executed only if If-Modified-Since is present in request, which usually is less popular than more deterministic If-None-Match.

Still, we add is here as it costs us nothing to avoid returning payload and returning HTTP 304 Not Modified instead.

// Detect when If-Modified-Since HTTP header allows returning HTTP 304 Not Modified
ifModifiedSince := r.Header.Get("If-Modified-Since")
if ifModifiedSince == "" {
return false
}

// Resolve path to be able to read pathMetadata.ModTime
pathMetadata, err := i.backend.ResolvePath(r.Context(), rq.immutablePath)
if err != nil {
var forwardedPath path.ImmutablePath
var continueProcessing bool
if isWebRequest(rq.responseFormat) {
forwardedPath, continueProcessing = i.handleWebRequestErrors(w, r, rq.mostlyResolvedPath(), rq.immutablePath, rq.contentPath, err, rq.logger)
if continueProcessing {
pathMetadata, err = i.backend.ResolvePath(r.Context(), forwardedPath)
}
}
if !continueProcessing || err != nil {
err = fmt.Errorf("failed to resolve %s: %w", debugStr(rq.contentPath.String()), err)
i.webError(w, r, err, http.StatusInternalServerError)
return true
}
}

// Currently we only care about optional mtime from UnixFS 1.5 (dag-pb)
// but other sources of this metadata could be added in the future
lastModified := pathMetadata.ModTime
if lastModifiedMatch(ifModifiedSince, lastModified) {
w.WriteHeader(http.StatusNotModified)
return true
}

// Check if the resolvedPath is an immutable path.
_, err = path.NewImmutablePath(pathMetadata.LastSegment)
if err != nil {
i.webError(w, r, err, http.StatusInternalServerError)
return true
}

rq.pathMetadata = &pathMetadata
return false
}

// check if request was for one of known explicit formats,
// or should use the default, implicit Web+UnixFS behaviors.
func isWebRequest(responseFormat string) bool {
Expand Down
8 changes: 8 additions & 0 deletions gateway/handler_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h

setIpfsRootsHeader(w, rq, &pathMetadata)

// On deserialized responses, we prefer Last-Modified from pathMetadata
// (mtime in UnixFS 1.5 DAG). This also applies to /ipns/, because value
// from dag-pb, if present, is more meaningful than lastMod inferred from
// namesys.
if !pathMetadata.ModTime.IsZero() {
rq.lastMod = pathMetadata.ModTime
}

resolvedPath := pathMetadata.LastSegment
switch mc.Code(resolvedPath.RootCid().Prefix().Codec) {
case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor:
Expand Down
Binary file added gateway/testdata/unixfs-dir-with-mode-mtime.car
Binary file not shown.