Skip to content

Commit 052080c

Browse files
authored
Fix syncing to S3 without a prefix (#558)
**Why?** When the sync_to_s3.py script is asked to sync files without a prefix set, it will instruct the S3 list-objects-v2 API to list all objects with the prefix "/". This does not return anything. Hence the script thinks it needs to upload the files as they are missing. **What?** * Added logic to only add the prefix if set. * Added test to validate that the lookup without a prefix works as instructed.
1 parent 5ee4a94 commit 052080c

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/sync_to_s3.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,9 @@ def _get_recursive_s3_objects(
416416
s3_list_objects_paginator = s3_client.get_paginator("list_objects_v2")
417417
s3_object_iterator = s3_list_objects_paginator.paginate(
418418
Bucket=s3_bucket,
419-
Prefix=f"{s3_prefix}/",
419+
Prefix=(
420+
f"{s3_prefix}/" if s3_prefix else ""
421+
),
420422
)
421423
s3_objects = {}
422424
for response_data in s3_object_iterator:

src/lambda_codebase/initial_commit/bootstrap_repository/adf-build/shared/helpers/tests/test_sync_to_s3.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,88 @@ def test_get_s3_objects_non_recursive_missing_object():
389389
) == {}
390390

391391

392+
def test_get_s3_objects_without_prefix():
393+
s3_client = Mock()
394+
s3_bucket = "your-bucket"
395+
s3_prefix = ""
396+
example_s3_objects = dict(map(
397+
lambda kv: (
398+
kv[0],
399+
{
400+
# Remove the prefix from the key
401+
"key": kv[1]["key"][kv[1]["key"].find("/") + 1:],
402+
"metadata": kv[1]["metadata"],
403+
}
404+
),
405+
deepcopy(EXAMPLE_S3_OBJECTS).items(),
406+
))
407+
file_extensions = [".yml", ".yaml"]
408+
409+
paginator = Mock()
410+
s3_client.get_paginator.return_value = paginator
411+
412+
s3_obj_keys = list(map(
413+
lambda obj: {
414+
"Key": obj["key"],
415+
},
416+
example_s3_objects.values(),
417+
))
418+
s3_obj_data = dict(map(
419+
lambda obj: (
420+
obj["key"],
421+
{
422+
"Key": obj["key"],
423+
"Metadata": obj["metadata"],
424+
}
425+
),
426+
example_s3_objects.values(),
427+
))
428+
paginator.paginate.return_value = [
429+
{
430+
"Contents": s3_obj_keys[:2],
431+
},
432+
{
433+
"Contents": [
434+
{
435+
"Key": "README.md",
436+
},
437+
{
438+
"Key": "other-file.json",
439+
}
440+
],
441+
},
442+
{
443+
"Contents": s3_obj_keys[2:],
444+
},
445+
]
446+
s3_client.head_object.side_effect = (
447+
lambda **kwargs: s3_obj_data[kwargs["Key"]]
448+
)
449+
450+
assert get_s3_objects(
451+
s3_client,
452+
s3_bucket,
453+
s3_prefix,
454+
file_extensions,
455+
recursive=True,
456+
) == example_s3_objects
457+
458+
s3_client.get_paginator.assert_called_once_with("list_objects_v2")
459+
paginator.paginate.assert_called_once_with(
460+
Bucket=s3_bucket,
461+
Prefix="",
462+
)
463+
s3_client.head_object.assert_has_calls(
464+
list(map(
465+
lambda obj: call(
466+
Bucket=s3_bucket,
467+
Key=obj.get("key"),
468+
),
469+
example_s3_objects.values(),
470+
)),
471+
)
472+
473+
392474
def test_get_s3_objects_recursive_success():
393475
s3_client = Mock()
394476
s3_bucket = "your-bucket"

0 commit comments

Comments
 (0)