Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions plugins/extractors/bigquery/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ source:
project_id: google-project-id
table_pattern: gofood.fact_
profile_column: true
credentials_json:
service_account_base64: _________BASE64_ENCODED_SERVICE_ACCOUNT_________________
service_account_json:
{
"type": "service_account",
"private_key_id": "xxxxxxx",
Expand All @@ -33,7 +34,8 @@ source:
| Key | Value | Example | Description | |
| :-- | :---- | :------ | :---------- | :- |
| `project_id` | `string` | `my-project` | BigQuery Project ID | *required* |
| `credentials_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | *optional* |
| `service_account_base64` | `string` | `____BASE64_ENCODED_SERVICE_ACCOUNT____` | Service Account in base64 encoded string. Takes precedence over `service_account_json` value | *optional* |
| `service_account_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | *optional* |
| `table_pattern` | `string` | `gofood.fact_` | Regex pattern to filter which bigquery table to scan (whitelist) | *optional* |
| `include_column_profile` | `bool` | `true` | true if you want to profile the column value such min, max, med, avg, top, and freq | *optional* |
| `max_preview_rows` | `int` | `30` | max number of preview rows to fetch, `0` will skip preview fetching. Default to `30`. | *optional* |
Expand Down
17 changes: 15 additions & 2 deletions plugins/extractors/bigquery/bigquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package bigquery
import (
"context"
_ "embed" // used to print the embedded assets
"encoding/base64"
"encoding/json"
"html/template"
"strings"
Expand Down Expand Up @@ -30,7 +31,9 @@ var summary string

// Config holds the set of configuration for the bigquery extractor
type Config struct {
ProjectID string `mapstructure:"project_id" validate:"required"`
ProjectID string `mapstructure:"project_id" validate:"required"`
// ServiceAccountBase64 takes precedence over ServiceAccountJSON field
ServiceAccountBase64 string `mapstructure:"service_account_base64"`
ServiceAccountJSON string `mapstructure:"service_account_json"`
TablePattern string `mapstructure:"table_pattern"`
IncludeColumnProfile bool `mapstructure:"include_column_profile"`
Expand All @@ -44,6 +47,7 @@ var sampleConfig = `
project_id: google-project-id
table_pattern: gofood.fact_
include_column_profile: true
service_account_base64: ____base64_encoded_service_account____
service_account_json: |-
{
"type": "service_account",
Expand Down Expand Up @@ -137,11 +141,20 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error)

// Create big query client
func (e *Extractor) createClient(ctx context.Context) (*bigquery.Client, error) {
if e.config.ServiceAccountJSON == "" {
if e.config.ServiceAccountBase64 == "" && e.config.ServiceAccountJSON == "" {
e.logger.Info("credentials are not specified, creating bigquery client using default credentials...")
return bigquery.NewClient(ctx, e.config.ProjectID)
}

if e.config.ServiceAccountBase64 != "" {
serviceAccountJSON, err := base64.StdEncoding.DecodeString(e.config.ServiceAccountBase64)
if err != nil || len(serviceAccountJSON) == 0 {
return nil, errors.Wrap(err, "failed to decode base64 service account")
}
// overwrite ServiceAccountJSON with credentials from ServiceAccountBase64 value
e.config.ServiceAccountJSON = string(serviceAccountJSON)
}

return bigquery.NewClient(ctx, e.config.ProjectID, option.WithCredentialsJSON([]byte(e.config.ServiceAccountJSON)))
}

Expand Down
13 changes: 13 additions & 0 deletions plugins/extractors/bigquery/bigquery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,17 @@ func TestInit(t *testing.T) {

assert.NotEqual(t, plugins.InvalidConfigError{}, err)
})
t.Run("should return error if service_account_base64 config is invalid", func(t *testing.T) {
extr := bigquery.New(utils.Logger)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := extr.Init(ctx, plugins.Config{
URNScope: "test-bigquery",
RawConfig: map[string]interface{}{
"project_id": "google-project-id",
"service_account_base64": "----", // invalid
}})

assert.ErrorContains(t, err, "failed to decode base64 service account")
})
}