Skip to content

Commit f053321

Browse files
spencerschrocknathaniel.wert
authored andcommitted
🌱 cron: support reading prefix from file for controller input files (7/n) (ossf#2445)
* add prefix marker file to config Signed-off-by: Spencer Schrock <[email protected]> * Read the new config values, if they exist. Signed-off-by: Spencer Schrock <[email protected]> * Add function to fetch prefix file config value. Signed-off-by: Spencer Schrock <[email protected]> * Read prefix file if prefix not set. Signed-off-by: Spencer Schrock <[email protected]> * Add tests to verify how List works with various prefixes Signed-off-by: Spencer Schrock <[email protected]> * Add tests for getPrefix Signed-off-by: Spencer Schrock <[email protected]> * Remove panics from iterator helper functions Signed-off-by: Spencer Schrock <[email protected]> Signed-off-by: Spencer Schrock <[email protected]> Signed-off-by: nathaniel.wert <[email protected]>
1 parent 07b5c15 commit f053321

9 files changed

Lines changed: 305 additions & 63 deletions

File tree

‎cron/config/config.go‎

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ const (
3737
// TransferStatusFilename file identifies if shard transfer to BigQuery is completed.
3838
TransferStatusFilename string = ".transfer_complete"
3939

40-
configFlag string = "config"
41-
configDefault string = ""
42-
configUsage string = "Location of config file. Required"
40+
configFlag string = "config"
41+
configDefault string = ""
42+
configUsage string = "Location of config file. Required"
43+
inputBucketParams string = "input-bucket"
4344

4445
projectID string = "SCORECARD_PROJECT_ID"
4546
requestTopicURL string = "SCORECARD_REQUEST_TOPIC_URL"
@@ -300,16 +301,36 @@ func GetAPIResultsBucketURL() (string, error) {
300301

301302
// GetInputBucketURL() returns the bucket URL for input files.
302303
func GetInputBucketURL() (string, error) {
303-
return getStringConfigValue(inputBucketURL, configYAML, "InputBucketURL", "input-bucket-url")
304+
bucketParams, err := GetAdditionalParams(inputBucketParams)
305+
bURL, ok := bucketParams["url"]
306+
if err != nil || !ok {
307+
// TODO temporarily falling back to old variables until changes propagate to production
308+
return getStringConfigValue(inputBucketURL, configYAML, "InputBucketURL", "input-bucket-url")
309+
}
310+
return bURL, nil
304311
}
305312

306313
// GetInputBucketPrefix() returns the prefix used when fetching files from a bucket.
307314
func GetInputBucketPrefix() (string, error) {
308-
prefix, err := getStringConfigValue(inputBucketPrefix, configYAML, "InputBucketPrefix", "input-bucket-prefix")
309-
if err != nil && !errors.Is(err, ErrorEmptyConfigValue) {
310-
return "", err
315+
bucketParams, err := GetAdditionalParams(inputBucketParams)
316+
if err != nil {
317+
// TODO temporarily falling back to old variables until changes propagate to production
318+
prefix, err := getStringConfigValue(inputBucketPrefix, configYAML, "InputBucketPrefix", "input-bucket-prefix")
319+
if err != nil && !errors.Is(err, ErrorEmptyConfigValue) {
320+
return "", err
321+
}
322+
return prefix, nil
323+
}
324+
return bucketParams["prefix"], nil
325+
}
326+
327+
// GetInputBucketPrefixFile() returns the file whose contents specify the prefix to use.
328+
func GetInputBucketPrefixFile() (string, error) {
329+
bucketParams, err := GetAdditionalParams(inputBucketParams)
330+
if err != nil {
331+
return "", fmt.Errorf("getting config for %s: %w", inputBucketParams, err)
311332
}
312-
return prefix, nil
333+
return bucketParams["prefix-file"], nil
313334
}
314335

315336
func GetAdditionalParams(subMapName string) (map[string]string, error) {

‎cron/config/config.yaml‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,20 @@ shard-size: 10
2222
webhook-url:
2323
metric-exporter: stackdriver
2424
result-data-bucket-url: gs://ossf-scorecard-data2
25+
26+
# TODO temporarily leaving old variables until changes propagate to production
2527
input-bucket-url: gs://ossf-scorecard-input-projects
2628
# Can be used to specify directories within a bucket. Can be empty.
2729
input-bucket-prefix:
2830

2931
additional-params:
30-
criticality:
32+
input-bucket:
33+
url: gs://ossf-scorecard-input-projects
34+
# Optional prefix to limit files used as input files within a bucket (e.g. a specific file or directory)
35+
prefix:
36+
# Optional file to read a prefix from, instead of statically defining prefix above (note: prefix must be blank to use this option)
37+
# This is good in situations where the prefix changes frequently (e.g. always using the most recent folder in a bucket)
38+
prefix-file:
3139

3240
scorecard:
3341
# API results bucket

‎cron/config/config_test.go‎

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,30 @@ const (
3838
prodShardSize int = 10
3939
prodMetricExporter string = "stackdriver"
4040
// Raw results.
41-
prodRawBucket = "gs://ossf-scorecard-rawdata"
42-
prodRawBigQueryTable = "scorecard-rawdata"
43-
prodAPIBucketURL = "gs://ossf-scorecard-cron-results"
44-
prodInputBucketURL = "gs://ossf-scorecard-input-projects"
45-
prodInputBucketPrefix = ""
41+
prodRawBucket = "gs://ossf-scorecard-rawdata"
42+
prodRawBigQueryTable = "scorecard-rawdata"
43+
prodAPIBucketURL = "gs://ossf-scorecard-cron-results"
44+
prodInputBucketURL = "gs://ossf-scorecard-input-projects"
45+
prodInputBucketPrefix = ""
46+
prodInputBucketPrefixFile = ""
4647
)
4748

4849
var (
50+
prodInputBucketParams = map[string]string{
51+
"url": prodInputBucketURL,
52+
"prefix": prodInputBucketPrefix,
53+
"prefix-file": prodInputBucketPrefixFile,
54+
}
4955
prodScorecardParams = map[string]string{
5056
"api-results-bucket-url": prodAPIBucketURL,
5157
"blacklisted-checks": prodBlacklistedChecks,
5258
"cii-data-bucket-url": prodCIIDataBucket,
5359
"raw-bigquery-table": prodRawBigQueryTable,
5460
"raw-result-data-bucket-url": prodRawBucket,
5561
}
56-
prodCriticalityParams map[string]string = nil
57-
prodAdditionalParams = map[string]map[string]string{
58-
"scorecard": prodScorecardParams,
59-
"criticality": prodCriticalityParams,
62+
prodAdditionalParams = map[string]map[string]string{
63+
"input-bucket": prodInputBucketParams,
64+
"scorecard": prodScorecardParams,
6065
}
6166
)
6267

@@ -474,3 +479,41 @@ func TestEnvVarName(t *testing.T) {
474479
})
475480
}
476481
}
482+
483+
func TestGetAdditionalParams(t *testing.T) {
484+
t.Parallel()
485+
//nolint:govet
486+
tests := []struct {
487+
name string
488+
mapName string
489+
want map[string]string
490+
wantErr bool
491+
}{
492+
{
493+
name: "scorecard values",
494+
mapName: "scorecard",
495+
want: prodScorecardParams,
496+
wantErr: false,
497+
},
498+
{
499+
name: "nonexistant value",
500+
mapName: "this-value-should-never-exist",
501+
want: map[string]string{},
502+
wantErr: true,
503+
},
504+
}
505+
for _, testcase := range tests {
506+
testcase := testcase
507+
t.Run(testcase.name, func(t *testing.T) {
508+
t.Parallel()
509+
got, err := GetAdditionalParams(testcase.mapName)
510+
if testcase.wantErr != (err != nil) {
511+
t.Fatalf("unexpected error value for GetAdditionalParams: %v", err)
512+
}
513+
if !cmp.Equal(got, testcase.want) {
514+
diff := cmp.Diff(got, testcase.want)
515+
t.Errorf("test failed: expected - %v, got - %v. \n%s", testcase.want, got, diff)
516+
}
517+
})
518+
}
519+
}

‎cron/data/blob_test.go‎

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,26 @@ func TestBlobKeysPrefix(t *testing.T) {
128128
{
129129
name: "no prefix",
130130
prefix: "",
131-
want: []string{"key1.txt", "key2.txt", "key3.txt", "subdir/key4.txt"},
131+
want: []string{"key1.txt", "key2.txt", "key3.txt", "subdir/key4.txt", "subdir/nested/key5.txt"},
132132
},
133133
{
134134
name: "subdir prefix",
135135
prefix: "subdir/",
136+
want: []string{"subdir/key4.txt", "subdir/nested/key5.txt"},
137+
},
138+
{
139+
name: "subdir prefix no terminal slash",
140+
prefix: "subdir",
141+
want: []string{"subdir/key4.txt", "subdir/nested/key5.txt"},
142+
},
143+
{
144+
name: "nested prefix",
145+
prefix: "subdir/nested/",
146+
want: []string{"subdir/nested/key5.txt"},
147+
},
148+
{
149+
name: "file prefix",
150+
prefix: "subdir/key4.txt",
136151
want: []string{"subdir/key4.txt"},
137152
},
138153
}

‎cron/data/testdata/blob_test/subdir/nested/key5.txt‎

Whitespace-only changes.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2022 Security Scorecard Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// Package main implements the PubSub controller.
16+
package main
17+
18+
import (
19+
"bytes"
20+
"context"
21+
"fmt"
22+
"strings"
23+
24+
"github.com/ossf/scorecard/v4/cron/config"
25+
"github.com/ossf/scorecard/v4/cron/data"
26+
)
27+
28+
// getPrefix returns the prefix used when reading input files from a bucket.
29+
// If "prefix" is set, the value is used irrespective of the value of "prefix-file".
30+
// Otherwise, the contents of "prefix-file" (if set) are used.
31+
func getPrefix(ctx context.Context, bucket string) (string, error) {
32+
prefix, err := config.GetInputBucketPrefix()
33+
if err != nil {
34+
return "", fmt.Errorf("config.GetInputBucketPrefix: %w", err)
35+
}
36+
if prefix != "" {
37+
// prioritize prefix if set
38+
return prefix, nil
39+
}
40+
41+
prefixFile, err := config.GetInputBucketPrefixFile()
42+
if err != nil {
43+
return "", fmt.Errorf("config.GetInputBucketPrefixFile: %w", err)
44+
}
45+
if prefixFile == "" {
46+
// cant read a file which doesnt exist, but the value is optional so no error
47+
return "", nil
48+
}
49+
50+
b, err := data.GetBlobContent(ctx, bucket, prefixFile)
51+
if err != nil {
52+
return "", fmt.Errorf("fetching contents of prefix-file: %w", err)
53+
}
54+
s := string(b)
55+
return strings.TrimSpace(s), nil
56+
}
57+
58+
func bucketFiles(ctx context.Context) (data.Iterator, error) {
59+
var iters []data.Iterator
60+
61+
bucket, err := config.GetInputBucketURL()
62+
if err != nil {
63+
return nil, fmt.Errorf("config.GetInputBucketURL: %w", err)
64+
}
65+
prefix, err := getPrefix(ctx, bucket)
66+
if err != nil {
67+
return nil, fmt.Errorf("getPrefix: %w", err)
68+
}
69+
70+
files, err := data.GetBlobKeysWithPrefix(ctx, bucket, prefix)
71+
if err != nil {
72+
return nil, fmt.Errorf("data.GetBlobKeysWithPrefix: %w", err)
73+
}
74+
75+
for _, f := range files {
76+
b, err := data.GetBlobContent(ctx, bucket, f)
77+
if err != nil {
78+
return nil, fmt.Errorf("data.GetBlobContent: %w", err)
79+
}
80+
r := bytes.NewReader(b)
81+
i, err := data.MakeIteratorFrom(r)
82+
if err != nil {
83+
return nil, fmt.Errorf("data.MakeIteratorFrom: %w", err)
84+
}
85+
iters = append(iters, i)
86+
}
87+
iter, err := data.MakeNestedIterator(iters)
88+
if err != nil {
89+
return nil, fmt.Errorf("data.MakeNestedIterator: %w", err)
90+
}
91+
return iter, nil
92+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright 2022 Security Scorecard Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"context"
19+
"path/filepath"
20+
"testing"
21+
)
22+
23+
//nolint:tparallel,paralleltest // since t.Setenv is used
24+
func TestGetPrefix(t *testing.T) {
25+
//nolint:govet
26+
testcases := []struct {
27+
name string
28+
url string
29+
prefix string
30+
prefixFile string
31+
want string
32+
wantErr bool
33+
}{
34+
{
35+
name: "no prefix",
36+
url: "testdata/getPrefix",
37+
prefix: "",
38+
prefixFile: "",
39+
want: "",
40+
wantErr: false,
41+
},
42+
{
43+
name: "prefix set",
44+
url: "testdata/getPrefix",
45+
prefix: "foo",
46+
prefixFile: "",
47+
want: "foo",
48+
wantErr: false,
49+
},
50+
{
51+
name: "prefix file set",
52+
url: "testdata/getPrefix",
53+
prefix: "",
54+
prefixFile: "marker",
55+
want: "bar",
56+
wantErr: false,
57+
},
58+
{
59+
name: "prefix and prefix file set",
60+
url: "testdata/getPrefix",
61+
prefix: "foo",
62+
prefixFile: "marker",
63+
want: "foo",
64+
wantErr: false,
65+
},
66+
{
67+
name: "non existent prefix file",
68+
url: "testdata/getPrefix",
69+
prefix: "",
70+
prefixFile: "baz",
71+
want: "",
72+
wantErr: true,
73+
},
74+
}
75+
76+
for _, testcase := range testcases {
77+
testcase := testcase
78+
t.Run(testcase.name, func(t *testing.T) {
79+
t.Setenv("INPUT_BUCKET_URL", testcase.url)
80+
t.Setenv("INPUT_BUCKET_PREFIX", testcase.prefix)
81+
t.Setenv("INPUT_BUCKET_PREFIX_FILE", testcase.prefixFile)
82+
testdataPath, err := filepath.Abs(testcase.url)
83+
if err != nil {
84+
t.Errorf("unexpected error: %v", err)
85+
}
86+
got, err := getPrefix(context.Background(), "file:///"+testdataPath)
87+
if (err != nil) != testcase.wantErr {
88+
t.Errorf("unexpected error produced: %v", err)
89+
}
90+
if got != testcase.want {
91+
t.Errorf("test failed: expected - %s, got = %s", testcase.want, got)
92+
}
93+
})
94+
}
95+
}

0 commit comments

Comments
 (0)