From 94f247d4558a89ddb53067843c6827684da3ef4d Mon Sep 17 00:00:00 2001 From: sushmith Date: Tue, 15 Nov 2022 12:15:37 +0530 Subject: [PATCH 1/6] feat(bigquery): use service_account_base64 to pass credentials * use `service_account_base64` config that will take precedence over service_account_json in bigquery extractor --- plugins/extractors/bigquery/bigquery.go | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/plugins/extractors/bigquery/bigquery.go b/plugins/extractors/bigquery/bigquery.go index df8d4c3ed..7ac76bf03 100644 --- a/plugins/extractors/bigquery/bigquery.go +++ b/plugins/extractors/bigquery/bigquery.go @@ -3,7 +3,9 @@ package bigquery import ( "context" _ "embed" // used to print the embedded assets + "encoding/base64" "encoding/json" + "fmt" "html/template" "strings" "sync" @@ -30,7 +32,9 @@ var summary string // Config holds the set of configuration for the bigquery extractor type Config struct { - ProjectID string `mapstructure:"project_id" validate:"required"` + ProjectID string `mapstructure:"project_id" validate:"required"` + //ServiceAccountBase64 takes precedence over ServiceAccountJSON field + ServiceAccountBase64 string `mapstructure:"service_account_base64"` ServiceAccountJSON string `mapstructure:"service_account_json"` TablePattern string `mapstructure:"table_pattern"` IncludeColumnProfile bool `mapstructure:"include_column_profile"` @@ -44,6 +48,7 @@ var sampleConfig = ` project_id: google-project-id table_pattern: gofood.fact_ include_column_profile: true +service_account_base64: ____base64_encoded_service_account____ service_account_json: |- { "type": "service_account", @@ -137,11 +142,20 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) // Create big query client func (e *Extractor) createClient(ctx context.Context) (*bigquery.Client, error) { - if e.config.ServiceAccountJSON == "" { + if e.config.ServiceAccountBase64 == "" && e.config.ServiceAccountJSON == "" { e.logger.Info("credentials are not specified, creating bigquery client using default credentials...") return bigquery.NewClient(ctx, e.config.ProjectID) } + if e.config.ServiceAccountBase64 != "" { + serviceAccountJSON, err := base64.StdEncoding.DecodeString(e.config.ServiceAccountBase64) + if err != nil { + e.logger.Fatal(fmt.Sprintf("failed to decode base64 service account, err : %v\n", err)) + } + // overwrite ServiceAccountJSON with credentials from ServiceAccountBase64 value + e.config.ServiceAccountJSON = string(serviceAccountJSON) + } + return bigquery.NewClient(ctx, e.config.ProjectID, option.WithCredentialsJSON([]byte(e.config.ServiceAccountJSON))) } From acf78b0f7f6a1c82a1523985d8946aad3f411949 Mon Sep 17 00:00:00 2001 From: sushmith Date: Tue, 15 Nov 2022 12:16:58 +0530 Subject: [PATCH 2/6] docs: fix incorrect config in readme --- plugins/extractors/bigquery/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/extractors/bigquery/README.md b/plugins/extractors/bigquery/README.md index 780fb57ec..2f89f373f 100644 --- a/plugins/extractors/bigquery/README.md +++ b/plugins/extractors/bigquery/README.md @@ -9,7 +9,7 @@ source: project_id: google-project-id table_pattern: gofood.fact_ profile_column: true - credentials_json: + service_account_json: { "type": "service_account", "private_key_id": "xxxxxxx", @@ -33,7 +33,7 @@ source: | Key | Value | Example | Description | | | :-- | :---- | :------ | :---------- | :- | | `project_id` | `string` | `my-project` | BigQuery Project ID | *required* | -| `credentials_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | *optional* | +| `service_account_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | *optional* | | `table_pattern` | `string` | `gofood.fact_` | Regex pattern to filter which bigquery table to scan (whitelist) | *optional* | | `include_column_profile` | `bool` | `true` | true if you want to profile the column value such min, max, med, avg, top, and freq | *optional* | | `max_preview_rows` | `int` | `30` | max number of preview rows to fetch, `0` will skip preview fetching. Default to `30`. | *optional* | From b8717b41611159c539219d02ff2f6c9dbbb017f5 Mon Sep 17 00:00:00 2001 From: sushmith Date: Tue, 15 Nov 2022 12:22:21 +0530 Subject: [PATCH 3/6] docs: update readme with new config field --- plugins/extractors/bigquery/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/extractors/bigquery/README.md b/plugins/extractors/bigquery/README.md index 2f89f373f..d2ef2b27e 100644 --- a/plugins/extractors/bigquery/README.md +++ b/plugins/extractors/bigquery/README.md @@ -9,6 +9,7 @@ source: project_id: google-project-id table_pattern: gofood.fact_ profile_column: true + service_account_base64: _________BASE64_ENCODED_SERVICE_ACCOUNT_________________ service_account_json: { "type": "service_account", @@ -33,6 +34,7 @@ source: | Key | Value | Example | Description | | | :-- | :---- | :------ | :---------- | :- | | `project_id` | `string` | `my-project` | BigQuery Project ID | *required* | +| `service_account_base64` | `string` | `____BASE64_ENCODED_SERVICE_ACCOUNT____` | Service Account in base64 encoded string. Takes precedence over `service_account_json` value | *optional* | | `service_account_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | *optional* | | `table_pattern` | `string` | `gofood.fact_` | Regex pattern to filter which bigquery table to scan (whitelist) | *optional* | | `include_column_profile` | `bool` | `true` | true if you want to profile the column value such min, max, med, avg, top, and freq | *optional* | From 7a74a7728081176a2d0193ed7b96fa21e9bc7fdf Mon Sep 17 00:00:00 2001 From: Sushmith <6890568+bsushmith@users.noreply.github.com> Date: Tue, 15 Nov 2022 12:40:22 +0530 Subject: [PATCH 4/6] lint: fix lint formatting Co-authored-by: Suhas Karanth --- plugins/extractors/bigquery/bigquery.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/extractors/bigquery/bigquery.go b/plugins/extractors/bigquery/bigquery.go index 7ac76bf03..a1962745e 100644 --- a/plugins/extractors/bigquery/bigquery.go +++ b/plugins/extractors/bigquery/bigquery.go @@ -33,7 +33,7 @@ var summary string // Config holds the set of configuration for the bigquery extractor type Config struct { ProjectID string `mapstructure:"project_id" validate:"required"` - //ServiceAccountBase64 takes precedence over ServiceAccountJSON field + // ServiceAccountBase64 takes precedence over ServiceAccountJSON field ServiceAccountBase64 string `mapstructure:"service_account_base64"` ServiceAccountJSON string `mapstructure:"service_account_json"` TablePattern string `mapstructure:"table_pattern"` From 86c0b58930198d42c88f4be5db5ff141b386131a Mon Sep 17 00:00:00 2001 From: sushmith Date: Tue, 15 Nov 2022 13:03:46 +0530 Subject: [PATCH 5/6] test: add test case --- plugins/extractors/bigquery/bigquery.go | 5 ++--- plugins/extractors/bigquery/bigquery_test.go | 13 +++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/plugins/extractors/bigquery/bigquery.go b/plugins/extractors/bigquery/bigquery.go index a1962745e..768ff35c6 100644 --- a/plugins/extractors/bigquery/bigquery.go +++ b/plugins/extractors/bigquery/bigquery.go @@ -5,7 +5,6 @@ import ( _ "embed" // used to print the embedded assets "encoding/base64" "encoding/json" - "fmt" "html/template" "strings" "sync" @@ -149,8 +148,8 @@ func (e *Extractor) createClient(ctx context.Context) (*bigquery.Client, error) if e.config.ServiceAccountBase64 != "" { serviceAccountJSON, err := base64.StdEncoding.DecodeString(e.config.ServiceAccountBase64) - if err != nil { - e.logger.Fatal(fmt.Sprintf("failed to decode base64 service account, err : %v\n", err)) + if err != nil || len(serviceAccountJSON) == 0 { + return nil, errors.Wrap(err, "failed to decode base64 service account") } // overwrite ServiceAccountJSON with credentials from ServiceAccountBase64 value e.config.ServiceAccountJSON = string(serviceAccountJSON) diff --git a/plugins/extractors/bigquery/bigquery_test.go b/plugins/extractors/bigquery/bigquery_test.go index 6b7a50a72..ef458f7b2 100644 --- a/plugins/extractors/bigquery/bigquery_test.go +++ b/plugins/extractors/bigquery/bigquery_test.go @@ -39,4 +39,17 @@ func TestInit(t *testing.T) { assert.NotEqual(t, plugins.InvalidConfigError{}, err) }) + t.Run("should return error if service_account_base64 config is invalid", func(t *testing.T) { + extr := bigquery.New(utils.Logger) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + err := extr.Init(ctx, plugins.Config{ + URNScope: "test-bigquery", + RawConfig: map[string]interface{}{ + "project_id": "google-project-id", + "service_account_base64": "----", // invalid + }}) + + assert.ErrorContains(t, err, "failed to decode base64 service account") + }) } From 2d97d6504e364ffc4c355a2ce178cbb51528aaca Mon Sep 17 00:00:00 2001 From: sushmith Date: Tue, 15 Nov 2022 17:34:53 +0530 Subject: [PATCH 6/6] docs: update sample config --- plugins/extractors/bigquery/bigquery.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/extractors/bigquery/bigquery.go b/plugins/extractors/bigquery/bigquery.go index 768ff35c6..b36eebc72 100644 --- a/plugins/extractors/bigquery/bigquery.go +++ b/plugins/extractors/bigquery/bigquery.go @@ -47,6 +47,8 @@ var sampleConfig = ` project_id: google-project-id table_pattern: gofood.fact_ include_column_profile: true +# Only one of service_account_base64 / service_account_json is needed. +# If both are present, service_account_base64 takes precedence service_account_base64: ____base64_encoded_service_account____ service_account_json: |- {