From bff36031303f25e95647c1f51301a3f0f9d6b568 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 07:26:10 +0000 Subject: [PATCH 1/3] Initial plan From 67c299789b79d6fc1d9483de2c3dc566c65de896 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 07:35:20 +0000 Subject: [PATCH 2/3] Add ContentType support for Blob and Chunk resources - Add ContentType field to BlobStatus to store content type from source - Add SourceResponseHeadersToDestination field to ChunkSpec to specify headers to forward - Update blob_from_chunk_controller to extract ContentType from HEAD responses - Update blob_to_chunk_controller to set SourceResponseHeadersToDestination with Content-Type - Update chunk_runner to forward specified source headers to destination requests - Regenerate API code (deepcopy, openapi, clientset, etc.) Co-authored-by: wzshiming <6565744+wzshiming@users.noreply.github.com> --- pkg/apis/task/v1alpha1/blob_types.go | 4 ++ pkg/apis/task/v1alpha1/chunk_types.go | 5 +++ .../task/v1alpha1/zz_generated.deepcopy.go | 5 +++ pkg/controller/blob_from_chunk_controller.go | 4 ++ pkg/controller/blob_to_chunk_controller.go | 10 +++++ pkg/openapi/zz_generated.openapi.go | 22 +++++++++++ pkg/runner/chunk_runner.go | 39 ++++++++++++------- 7 files changed, 76 insertions(+), 13 deletions(-) diff --git a/pkg/apis/task/v1alpha1/blob_types.go b/pkg/apis/task/v1alpha1/blob_types.go index a5e09d3..fa87ee6 100644 --- a/pkg/apis/task/v1alpha1/blob_types.go +++ b/pkg/apis/task/v1alpha1/blob_types.go @@ -75,6 +75,10 @@ type BlobStatus struct { // AcceptRanges represents the accept-ranges header of the response. AcceptRanges bool `json:"acceptRanges,omitempty"` + // ContentType is the content type of the blob from the source response. + // +optional + ContentType string `json:"contentType,omitempty"` + // Progress is the progress of the blob. Progress int64 `json:"progress,omitempty"` diff --git a/pkg/apis/task/v1alpha1/chunk_types.go b/pkg/apis/task/v1alpha1/chunk_types.go index 658832a..8631a6f 100644 --- a/pkg/apis/task/v1alpha1/chunk_types.go +++ b/pkg/apis/task/v1alpha1/chunk_types.go @@ -162,6 +162,11 @@ type ChunkSpec struct { // Destination is the destination of the chunk. Destination []ChunkHTTP `json:"destination,omitempty"` + // SourceResponseHeadersToDestination specifies which headers from the source response + // should be forwarded to destination requests (e.g., ["Content-Type", "Content-Encoding"]). + // +optional + SourceResponseHeadersToDestination []string `json:"sourceResponseHeadersToDestination,omitempty"` + // Priority represents the relative importance of this chunk when multiple chunks exist. Priority int64 `json:"priority"` diff --git a/pkg/apis/task/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/task/v1alpha1/zz_generated.deepcopy.go index f24494d..9de9263 100644 --- a/pkg/apis/task/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/task/v1alpha1/zz_generated.deepcopy.go @@ -434,6 +434,11 @@ func (in *ChunkSpec) DeepCopyInto(out *ChunkSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.SourceResponseHeadersToDestination != nil { + in, out := &in.SourceResponseHeadersToDestination, &out.SourceResponseHeadersToDestination + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/controller/blob_from_chunk_controller.go b/pkg/controller/blob_from_chunk_controller.go index 10de72e..b16637d 100644 --- a/pkg/controller/blob_from_chunk_controller.go +++ b/pkg/controller/blob_from_chunk_controller.go @@ -260,6 +260,10 @@ func (c *BlobFromChunkController) fromHeadChunk(ctx context.Context, blob *v1alp blob.Status.AcceptRanges = chunk.Status.SourceResponse.Headers["accept-ranges"] == "bytes" } } + // Extract ContentType from source response headers + if contentType := chunk.Status.SourceResponse.Headers["content-type"]; contentType != "" { + blob.Status.ContentType = contentType + } blob.Status.Phase = v1alpha1.BlobPhaseRunning case v1alpha1.ChunkPhaseFailed: blob.Status.Retry = chunk.Status.Retry diff --git a/pkg/controller/blob_to_chunk_controller.go b/pkg/controller/blob_to_chunk_controller.go index d89b0b9..3f38b88 100644 --- a/pkg/controller/blob_to_chunk_controller.go +++ b/pkg/controller/blob_to_chunk_controller.go @@ -369,6 +369,11 @@ func (c *BlobToChunkController) toOneChunk(ctx context.Context, blob *v1alpha1.B }, } + // Set headers to forward from source to destination if ContentType is known + if blob.Status.ContentType != "" { + chunk.Spec.SourceResponseHeadersToDestination = []string{"Content-Type"} + } + for _, dst := range blob.Spec.Destination { s3 := c.s3[dst.Name] if s3 == nil { @@ -508,6 +513,11 @@ func (c *BlobToChunkController) buildChunk(blob *v1alpha1.Blob, name string, num }, } + // Set headers to forward from source to destination if ContentType is known + if blob.Status.ContentType != "" { + chunk.Spec.SourceResponseHeadersToDestination = []string{"Content-Type"} + } + for j, dst := range blob.Spec.Destination { s3 := c.s3[dst.Name] if s3 == nil { diff --git a/pkg/openapi/zz_generated.openapi.go b/pkg/openapi/zz_generated.openapi.go index 9e7a72e..0df2d3b 100644 --- a/pkg/openapi/zz_generated.openapi.go +++ b/pkg/openapi/zz_generated.openapi.go @@ -679,6 +679,13 @@ func schema_pkg_apis_task_v1alpha1_BlobStatus(ref common.ReferenceCallback) comm Format: "", }, }, + "contentType": { + SchemaProps: spec.SchemaProps{ + Description: "ContentType is the content type of the blob from the source response.", + Type: []string{"string"}, + Format: "", + }, + }, "progress": { SchemaProps: spec.SchemaProps{ Description: "Progress is the progress of the blob.", @@ -1009,6 +1016,21 @@ func schema_pkg_apis_task_v1alpha1_ChunkSpec(ref common.ReferenceCallback) commo }, }, }, + "sourceResponseHeadersToDestination": { + SchemaProps: spec.SchemaProps{ + Description: "SourceResponseHeadersToDestination specifies which headers from the source response should be forwarded to destination requests (e.g., [\"Content-Type\", \"Content-Encoding\"]).", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, "priority": { SchemaProps: spec.SchemaProps{ Description: "Priority represents the relative importance of this chunk when multiple chunks exist.", diff --git a/pkg/runner/chunk_runner.go b/pkg/runner/chunk_runner.go index be66da9..38d2240 100644 --- a/pkg/runner/chunk_runner.go +++ b/pkg/runner/chunk_runner.go @@ -317,7 +317,7 @@ func (r *ChunkRunner) tryAddBearer(ctx context.Context, chunk *v1alpha1.Chunk) e return nil } -func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, s *state) (io.ReadCloser, int64) { +func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, s *state) (io.ReadCloser, int64, map[string]string) { err := r.tryAddBearer(ctx, chunk) if err != nil { if errors.Is(err, ErrBearerNotReady) { @@ -329,13 +329,13 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, ss.Status.Conditions = nil return ss }) - return nil, 0 + return nil, 0, nil } else if errors.Is(err, ErrAuthentication) { s.handleProcessError("AuthenticationError", err) } else { s.handleProcessError("BearerFetchError", err) } - return nil, 0 + return nil, 0, nil } srcReq, err := r.buildRequest(ctx, &chunk.Spec.Source, nil, 0) @@ -346,7 +346,7 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, } else { s.handleProcessError("BuildRequestError", err) } - return nil, 0 + return nil, 0, nil } srcResp, err := r.httpClient.Do(srcReq) @@ -360,7 +360,7 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, } else { s.handleProcessError("SourceRequestError", err) } - return nil, 0 + return nil, 0, nil } headers := map[string]string{} @@ -392,7 +392,7 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, if srcResp.Body != nil { srcResp.Body.Close() } - return nil, 0 + return nil, 0, nil } } else { if srcResp.StatusCode >= http.StatusMultipleChoices { @@ -409,7 +409,7 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, if srcResp.Body != nil { srcResp.Body.Close() } - return nil, 0 + return nil, 0, nil } } @@ -422,7 +422,7 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, if srcResp.Body != nil { srcResp.Body.Close() } - return nil, 0 + return nil, 0, nil } for k, v := range chunk.Spec.Source.Response.Headers { @@ -434,14 +434,27 @@ func (r *ChunkRunner) sourceRequest(ctx context.Context, chunk *v1alpha1.Chunk, if srcResp.Body != nil { srcResp.Body.Close() } - return nil, 0 + return nil, 0, nil } } - return srcResp.Body, srcResp.ContentLength + return srcResp.Body, srcResp.ContentLength, headers } -func (r *ChunkRunner) destinationRequest(ctx context.Context, dest *v1alpha1.ChunkHTTP, dr *swmrCount, contentLength int64) (string, error) { +func (r *ChunkRunner) destinationRequest(ctx context.Context, chunk *v1alpha1.Chunk, dest *v1alpha1.ChunkHTTP, dr *swmrCount, contentLength int64, sourceHeaders map[string]string) (string, error) { + // Apply headers from source response to destination request if specified + if len(chunk.Spec.SourceResponseHeadersToDestination) > 0 && sourceHeaders != nil { + if dest.Request.Headers == nil { + dest.Request.Headers = make(map[string]string) + } + for _, headerName := range chunk.Spec.SourceResponseHeadersToDestination { + headerNameLower := strings.ToLower(headerName) + if value, exists := sourceHeaders[headerNameLower]; exists { + dest.Request.Headers[headerName] = value + } + } + } + destReq, err := r.buildRequest(ctx, dest, dr.NewReader(), contentLength) if err != nil { if retry, err := utils.IsNetworkError(err); !retry { @@ -519,7 +532,7 @@ func (r *ChunkRunner) process(continues <-chan struct{}, chunk *v1alpha1.Chunk) stopProgress := r.startProgressUpdater(ctx, s, &gsr, &gdrs) defer stopProgress() - body, contentLength := r.sourceRequest(ctx, chunk, s) + body, contentLength, sourceHeaders := r.sourceRequest(ctx, chunk, s) if body == nil { return } @@ -614,7 +627,7 @@ func (r *ChunkRunner) process(continues <-chan struct{}, chunk *v1alpha1.Chunk) i := i dr := drs[i] g.Go(func() error { - etag, err := r.destinationRequest(ctx, &dest, dr, contentLength) + etag, err := r.destinationRequest(ctx, chunk, &dest, dr, contentLength, sourceHeaders) if err != nil { return err } From c49eb78ca7e9c88ca530a6ac333fabcc44606188 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 17 Nov 2025 07:40:22 +0000 Subject: [PATCH 3/3] Add unit test for ContentType extraction Add TestContentTypeExtraction to verify that ContentType is correctly extracted from source response headers Co-authored-by: wzshiming <6565744+wzshiming@users.noreply.github.com> --- .../blob_from_chunk_controller_test.go | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/pkg/controller/blob_from_chunk_controller_test.go b/pkg/controller/blob_from_chunk_controller_test.go index 32043ae..0fcd6d4 100644 --- a/pkg/controller/blob_from_chunk_controller_test.go +++ b/pkg/controller/blob_from_chunk_controller_test.go @@ -334,3 +334,102 @@ func TestForceAcceptRanges(t *testing.T) { }) } } + +// TestContentTypeExtraction verifies that ContentType is correctly extracted from source response +func TestContentTypeExtraction(t *testing.T) { + tests := []struct { + name string + blob *v1alpha1.Blob + chunk *v1alpha1.Chunk + expectedContentType string + }{ + { + name: "ContentType extracted from HEAD response", + blob: &v1alpha1.Blob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-blob-with-content-type", + }, + Status: v1alpha1.BlobStatus{ + Phase: v1alpha1.BlobPhasePending, + }, + }, + chunk: &v1alpha1.Chunk{ + ObjectMeta: metav1.ObjectMeta{ + Name: "blob:head:test-blob-with-content-type:0", + }, + Status: v1alpha1.ChunkStatus{ + Phase: v1alpha1.ChunkPhaseSucceeded, + SourceResponse: &v1alpha1.ChunkHTTPResponse{ + Headers: map[string]string{ + "content-length": "1024", + "content-type": "application/octet-stream", + "accept-ranges": "bytes", + }, + }, + }, + }, + expectedContentType: "application/octet-stream", + }, + { + name: "No ContentType in response", + blob: &v1alpha1.Blob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-blob-no-content-type", + }, + Status: v1alpha1.BlobStatus{ + Phase: v1alpha1.BlobPhasePending, + }, + }, + chunk: &v1alpha1.Chunk{ + ObjectMeta: metav1.ObjectMeta{ + Name: "blob:head:test-blob-no-content-type:0", + }, + Status: v1alpha1.ChunkStatus{ + Phase: v1alpha1.ChunkPhaseSucceeded, + SourceResponse: &v1alpha1.ChunkHTTPResponse{ + Headers: map[string]string{ + "content-length": "512", + "accept-ranges": "bytes", + }, + }, + }, + }, + expectedContentType: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + + // Setup fake client and informers + scheme := runtime.NewScheme() + _ = v1alpha1.AddToScheme(scheme) + + client := fake.NewSimpleClientset(tt.blob, tt.chunk) + informerFactory := externalversions.NewSharedInformerFactory(client, 0) + + controller := NewBlobFromChunkController( + "test-handler", + map[string]*sss.SSS{}, + client, + informerFactory, + ) + + // Manually populate the informer cache + informerFactory.Task().V1alpha1().Blobs().Informer().GetStore().Add(tt.blob) + informerFactory.Task().V1alpha1().Chunks().Informer().GetStore().Add(tt.chunk) + + // Call fromHeadChunk + err := controller.fromHeadChunk(ctx, tt.blob) + if err != nil { + t.Fatalf("fromHeadChunk failed: %v", err) + } + + // Verify the ContentType field is set correctly + if tt.blob.Status.ContentType != tt.expectedContentType { + t.Errorf("Expected ContentType to be %q, got %q", tt.expectedContentType, tt.blob.Status.ContentType) + } + }) + } +}