From 171343d9aec904f1a5340355c265df195dfa0af6 Mon Sep 17 00:00:00 2001 From: winnie <91998347+gwenwindflower@users.noreply.github.com> Date: Tue, 4 Jun 2024 11:55:12 -0500 Subject: [PATCH 01/28] Update Changelog, temporarily remove Case Sensitivity testing (#174) * Update changelog * Fix typo in CHANGELOG * Attempt to fix integration test case sensitivity for redshift * Always lower on Redshift * Fix indentation * Fix whitespace * Fix whitespace pt 2 * Update case sensitive seeds into folder * Use + for quote_columns config * Lower schema on redshift * Use target.type * Do some nonsense to make this work for Redshift * Move seeds config to properties.yml * Bypass redshift completely * Temporarily bypass Redshift in CI completely * Turn Redshift CI back on * Delete case sensitivity test * Delete case sensitive seed * Delete properties.yml for case sensitive seeds --- CHANGELOG.md | 126 +++++++++++++----- README.md | 9 +- integration_tests/dbt_project.yml | 6 +- .../seeds/data__Case_Sensitive.csv | 3 - .../test_generate_source_case_sensitive.sql | 35 ----- macros/generate_source.sql | 2 +- 6 files changed, 100 insertions(+), 81 deletions(-) delete mode 100644 integration_tests/seeds/data__Case_Sensitive.csv delete mode 100644 integration_tests/tests/test_generate_source_case_sensitive.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index c223dd6..8f8658e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,32 @@ -## New features -- `generate_model_yaml` with `upstream_descriptions=True` now reads from upstream sources in addition to models. +# dbt-codegen v0.12.0-b1 + +## What's Changed + +### Features + +- `generate_model_yaml` with `upstream_descriptions=True` now reads from upstream sources in addition to models.(#112)[https://github.com/dbt-labs/dbt-codegen/issues/112] +- `generate_source` now has options for case sensitivity in all fields ([#112](https://github.com/dbt-labs/dbt-codegen/issues/112) + +### Fixes -## Fixes - Column `description` fields are now correctly escaped in `generate_model_yaml` ([#142](https://github.com/dbt-labs/dbt-codegen/issues/142)) -- Fix `generate_source` behavior of applying a lowercase function to all object names ([#112](https://github.com/dbt-labs/dbt-codegen/issues/112)) + +### Docs + +- Fixed `generate_source` documentation +- Rewrote the contributor README at `integration_tests/README.md` + +## New Contributors + +- @wircho made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/159 +- @yatsky made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/164 +- @pnadolny made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/168 +- @esegal made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/154 +- @gwenwindflower made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/163 + +**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.11.0...0.12.0-b1 + +## New features # dbt-codegen v0.11.0 @@ -13,6 +36,7 @@ and are lowercase to align with the dbt style guide. Scale & precision are **not** included. Previous logic for `generate_source` defaulted to `false` and the resulting data types were uppercase and included scale & precision ([#122](https://github.com/dbt-labs/dbt-codegen/pull/122)). [Dispatch](https://docs.getdbt.com/reference/dbt-jinja-functions/dispatch) can be used to utilize the column data type formatting of previous versions. Namely, by adding this macro to your project: + ```sql {% macro default__data_type_format_source(column) %} {{ return(column.data_type | upper) }} @@ -20,43 +44,48 @@ and are lowercase to align with the dbt style guide. Scale & precision are **not ``` And then adding this within `dbt_project.yml`: + ```yaml dispatch: - macro_namespace: codegen - search_order: ['my_project', 'codegen'] + search_order: ["my_project", "codegen"] ``` ## What's Changed -* GitHub Action to add/remove triage labels as-needed by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/133 -* GitHub Action to close issues as stale as-needed by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/134 -* Update README.md by @cohms in https://github.com/dbt-labs/dbt-codegen/pull/129 -* Remove hard-coded values for database and schema by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/139 -* Instructions for the release process by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/137 -* Add `include_data_types` argument to `generate_model_yaml` macro by @linbug in https://github.com/dbt-labs/dbt-codegen/pull/122 + +- GitHub Action to add/remove triage labels as-needed by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/133 +- GitHub Action to close issues as stale as-needed by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/134 +- Update README.md by @cohms in https://github.com/dbt-labs/dbt-codegen/pull/129 +- Remove hard-coded values for database and schema by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/139 +- Instructions for the release process by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/137 +- Add `include_data_types` argument to `generate_model_yaml` macro by @linbug in https://github.com/dbt-labs/dbt-codegen/pull/122 ## New Contributors -* @cohms made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/129 -* @linbug made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/122 + +- @cohms made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/129 +- @linbug made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/122 **Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.10.0...v0.10.0 # dbt-codegen v0.10.0 ## What's Changed -* added comments to verbose regex in generate_model_import_ctes by @graciegoheen in https://github.com/dbt-labs/dbt-codegen/pull/93 -* Feature/hackathon model generator by @fivetran-joemarkiewicz in https://github.com/dbt-labs/dbt-codegen/pull/83 -* Suggestion to include packages.yml example in README.md by @Maayan-s in https://github.com/dbt-labs/dbt-codegen/pull/77 -* Add include_data_types flag to generate_source macro by @GSokol in https://github.com/dbt-labs/dbt-codegen/pull/76 -* Expected result of nested struct in BigQuery by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/105 -* issue106/get_models helper macro by @erkanncelen in https://github.com/dbt-labs/dbt-codegen/pull/115 -* Feat/generate sources add database and schema by @jeremyholtzman in https://github.com/dbt-labs/dbt-codegen/pull/124 + +- added comments to verbose regex in generate_model_import_ctes by @graciegoheen in https://github.com/dbt-labs/dbt-codegen/pull/93 +- Feature/hackathon model generator by @fivetran-joemarkiewicz in https://github.com/dbt-labs/dbt-codegen/pull/83 +- Suggestion to include packages.yml example in README.md by @Maayan-s in https://github.com/dbt-labs/dbt-codegen/pull/77 +- Add include_data_types flag to generate_source macro by @GSokol in https://github.com/dbt-labs/dbt-codegen/pull/76 +- Expected result of nested struct in BigQuery by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/105 +- issue106/get_models helper macro by @erkanncelen in https://github.com/dbt-labs/dbt-codegen/pull/115 +- Feat/generate sources add database and schema by @jeremyholtzman in https://github.com/dbt-labs/dbt-codegen/pull/124 ## New Contributors -* @fivetran-joemarkiewicz made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/83 -* @Maayan-s made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/77 -* @GSokol made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/76 -* @erkanncelen made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/115 -* @jeremyholtzman made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/124 + +- @fivetran-joemarkiewicz made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/83 +- @Maayan-s made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/77 +- @GSokol made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/76 +- @erkanncelen made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/115 +- @jeremyholtzman made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/124 **Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.9.0...0.10.0 @@ -67,30 +96,40 @@ dispatch: # dbt-codegen v0.8.0 # Unreleased + ## Breaking changes + ## New features + ## Quality of life + - Now uses `print` instead of `log` to output the generated text into the console. This enables you to invoke dbt with the `--quiet` flag and directly pipe the codegen output into a new file, ending up with valid yaml ## Under the hood + ## Contributors: + - [@JorgenG](https://github.com/JorgenG) (#86) # dbt-codegen v0.7.0 ## 🚨 Breaking change + - Add support for including description placeholders for the source and table, which changes the behavior of `generate_source` when `include_descriptions` is set to `True`. Previous logic only created description placeholders for the columns ([#64](https://github.com/dbt-labs/dbt-codegen/issues/64), [#66](https://github.com/dbt-labs/dbt-codegen/pull/66)) ## New features + - Add optional `table_names` arg to `generate_source` ([#50](https://github.com/dbt-labs/dbt-codegen/issues/50), [#51](https://github.com/dbt-labs/dbt-codegen/pull/51)) - Add support for importing descriptions from columns with the same names in upstream models. It is available by setting the parameter `upstream_descriptions` to `True` in `generate_model_yaml` ([#61](https://github.com/dbt-labs/dbt-codegen/pull/61)) - Added `case_sensitive_cols` argument to `generate_base_model` macro ([#63](https://github.com/dbt-labs/dbt-codegen/pull/63)) - Add optional `name` arg to `generate_source` ([#64](https://github.com/dbt-labs/dbt-codegen/issues/64), [#66](https://github.com/dbt-labs/dbt-codegen/pull/66)) ## Fixes + - `generate_model_yaml` now correctly handles nested `STRUCT` fields in BigQuery ([#27](https://github.com/dbt-labs/dbt-codegen/issues/27), [#54](https://github.com/dbt-labs/dbt-codegen/pull/54)) ## Contributors: + - [@rahulj51](https://github.com/rahulj51) (#51) - [@bodschut](https://github.com/bodschut) (#54) - [@b-per](https://github.com/b-per) (#61) @@ -102,6 +141,7 @@ dispatch: This release creates breaking changes to the `generate_source.sql` macro. ## Features + - add optional `table_pattern` argument to `generate_source.sql` macro. Default value is '%' to pull all tables in the raw data schema to preserve existing behavior if the `table_pattern` argument is not specified by the user. # dbt-codegen v0.5.0 @@ -109,6 +149,7 @@ This release creates breaking changes to the `generate_source.sql` macro. This release supports any version (minor and patch) of v1, which means far less need for compatibility releases in the future. ## Under the hood + - Change `require-dbt-version` to `[">=1.0.0", "<2.0.0"]` - Bump dbt-utils dependency - Replace `source-paths` and `data-paths` with `model-paths` and `seed-paths` respectively @@ -116,22 +157,27 @@ This release supports any version (minor and patch) of v1, which means far less - Replace `dbt_modules` with `dbt_packages` in `clean-targets` # dbt-codegen v0.4.1 + 🚨 This is a compatibility release in preparation for `dbt-core` v1.0.0 (πŸŽ‰). Projects using this version with `dbt-core` v1.0.x can expect to see a deprecation warning. This will be resolved in the next minor release. # dbt-codegen v0.4.0 ## Breaking changes + - Requires `dbt>=0.20.0` and `dbt-utils>=0.7.0` - Depends on `dbt-labs/dbt_utils` (instead of `fishtown-analytics/dbt_utils`) ## Features + - Add optional `leading_commas` arg to `generate_base_model` (#41 @jaypeedevlin) - Add optional `include_descriptions` arg to `generate_source` (#40 @djbelknapdbs) ## Fixes + - In the `generate_source` macro, use `dbt_utils.get_relations_by_pattern` instead of `get_relations_by_prefix`, since the latter will be deprecated in the future (#42) ## Under the hood + - Use new adapter.dispatch syntax (#44) # dbt-codegen v0.3.2 @@ -139,57 +185,73 @@ This release supports any version (minor and patch) of v1, which means far less This is a quality of life release ## Other -* Fix rendering issues on hub.getdbt.com -* Fix integration tests due to python version compatibility + +- Fix rendering issues on hub.getdbt.com +- Fix integration tests due to python version compatibility # dbt-codegen v0.3.1 + This is a bugfix release ## Fixes + - Use latest version of dbt-utils (0.6.2) to ensure generate_source_yaml works for non-target schemata (#34) # dbt-codegen v0.3.0 -## 🚨 Breaking change + +## 🚨 Breaking change + This release requires dbt v0.18.0, and dbt-utils v0.6.1. If you're not ready to upgrade, consider using a previous release of this package. ## Quality of life + - Use dbt v0.18.0 (#31) -- Fix README rendering on hub (#32 @calvingiles) +- Fix README rendering on hub (#32 @calvingiles) # dbt-codegen v0.2.0 + ## 🚨 Breaking change + The lower bound of `dbt-utils` is now `0.4.0`. This won't affect most users, since you're likely already using version of dbt-utils higher than this to achieve 0.17.0 compatibility. ## Quality of life: + - Change dbt-utils dependencies to `[>=0.4.0, <0.6.0]` (#29) - Fix tests (#29) # dbt-codegen v0.1.0 + ## 🚨 Breaking change! This package now requires dbt v0.17.x! ## Features: -* Add `generate_model_yaml` (#18 @jtalmi) +- Add `generate_model_yaml` (#18 @jtalmi) ## Under the hood: -* Update to v0.17.0, including `dbt_project.yml` version 2 syntax (#23) -* Add GitHub templates and installation instructions (#23) + +- Update to v0.17.0, including `dbt_project.yml` version 2 syntax (#23) +- Add GitHub templates and installation instructions (#23) ## Acknowledgements + @marzaccaro made a PR for `generate_model_yaml`, and, although I had reviewed it, I let the PR go stale and somehow completely forgot about it when merging PR #18 β€” this is completely my bad! So equal credit to @marzaccaro and @jtalmi for their work :clap: # dbt-codegen v0.0.4 + This is a bugfix release to improve compatibility with Snowflake # dbt-codegen v0.0.3 + Bump utils version range # dbt-codegen v0.0.2 + Small quality of life improvements # dbt-codegen v0.0.1 + Initial release diff --git a/README.md b/README.md index 29066e4..ae1d8e0 100644 --- a/README.md +++ b/README.md @@ -70,14 +70,13 @@ which you can then paste into a schema file. - `include_schema` (optional, default=False): Whether you want to add the schema to your source definition - `case_sensitive_databases` (optional, default=False): Whether you want database names to be -in lowercase, or to match the case in the source table + in lowercase, or to match the case in the source table β€” not compatible with Redshift - `case_sensitive_schemas` (optional, default=False): Whether you want schema names to be -in lowercase, or to match the case in the source table + in lowercase, or to match the case in the source table β€” not compatible with Redshift - `case_sensitive_tables` (optional, default=False): Whether you want table names to be -in lowercase, or to match the case in the source table + in lowercase, or to match the case in the source table β€” not compatible with Redshift - `case_sensitive_cols` (optional, default=False): Whether you want column names to be -in lowercase, or to match the case in the source table - + in lowercase, or to match the case in the source table ### Outputting to a file diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 166f04e..6f194d0 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -18,13 +18,9 @@ clean-targets: seeds: +schema: raw_data +quote_columns: false - codegen_integration_tests: - data__Case_Sensitive: - +schema: Raw_Data_Case_Sensitive - quote_columns: true + vars: my_table_reference: table_c models: +bind: false - diff --git a/integration_tests/seeds/data__Case_Sensitive.csv b/integration_tests/seeds/data__Case_Sensitive.csv deleted file mode 100644 index d0f2e97..0000000 --- a/integration_tests/seeds/data__Case_Sensitive.csv +++ /dev/null @@ -1,3 +0,0 @@ -Col_A,Col_B -1,a -2,b diff --git a/integration_tests/tests/test_generate_source_case_sensitive.sql b/integration_tests/tests/test_generate_source_case_sensitive.sql deleted file mode 100644 index 48f3d69..0000000 --- a/integration_tests/tests/test_generate_source_case_sensitive.sql +++ /dev/null @@ -1,35 +0,0 @@ - -{% set raw_schema = generate_schema_name('Raw_Data_Case_Sensitive') %} - --- test default args -{% set actual_source_yaml = codegen.generate_source( - schema_name=raw_schema, - database_name=target.database, - generate_columns=True, - name=raw_schema, - include_database=True, - include_schema=True, - case_sensitive_databases=True, - case_sensitive_schemas=True, - case_sensitive_tables=True, - case_sensitive_cols=True -) %} - -{% set expected_source_yaml %} -version: 2 - -sources: - - name: codegen_integration_tests_postgres_raw_data_case_sensitive - database: circle_test - schema: codegen_integration_tests_postgres_Raw_Data_Case_Sensitive - tables: - - name: data__Case_Sensitive - columns: - - name: Col_A - data_type: integer - - name: Col_B - data_type: text -{% endset %} - - -{{ assert_equal (actual_source_yaml | trim, expected_source_yaml | trim) }} diff --git a/macros/generate_source.sql b/macros/generate_source.sql index f769911..b3790f2 100644 --- a/macros/generate_source.sql +++ b/macros/generate_source.sql @@ -1,5 +1,5 @@ {% macro get_tables_in_schema(schema_name, database_name=target.database, table_pattern='%', exclude='') %} - + {% set tables=dbt_utils.get_relations_by_pattern( schema_pattern=schema_name, database=database_name, From e532bd43803553270071af7e803334de9f63d702 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 18 Oct 2024 13:25:27 -0500 Subject: [PATCH 02/28] Update testing structure for dbt Labs testing support - postgres (#181) * add tox * add postgres/redshift/bq * add more wh, update var names * fix profiles key * move supported adapters * Add CI workflow * update to allow circleci to keep working * fix BQ var name * remove config from profile * move to just support postgres * fix vars * use premade workflow * add newline * use merged version * add comments about future adapters * use tag * update readme * Add lines * reowkr profile so it can be reused * add sample env files and fix comment * Update .circleci/config.yml Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> * let circleCI set the schema * fix readme instructions * undo non-postgres changes --------- Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- .circleci/config.yml | 13 +++++---- .github/workflows/ci.yml | 33 ++++++++++++++++++++++ Makefile | 5 ++++ integration_tests/.env/bigquery.env | 3 ++ integration_tests/.env/postgres.env | 6 ++++ integration_tests/.env/redshift.env | 5 ++++ integration_tests/.env/snowflake.env | 6 ++++ integration_tests/README.md | 42 ++++++++-------------------- integration_tests/dbt_project.yml | 4 +++ integration_tests/profiles.yml | 25 ++++++++--------- supported_adapters.env | 1 + tox.ini | 27 ++++++++++++++++++ 12 files changed, 119 insertions(+), 51 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 integration_tests/.env/bigquery.env create mode 100644 integration_tests/.env/postgres.env create mode 100644 integration_tests/.env/redshift.env create mode 100644 integration_tests/.env/snowflake.env create mode 100644 supported_adapters.env create mode 100644 tox.ini diff --git a/.circleci/config.yml b/.circleci/config.yml index 2585c60..6746196 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,7 +10,7 @@ jobs: - checkout - run: - run: setup_creds + name: setup_creds command: | echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json @@ -29,11 +29,12 @@ jobs: - run: name: "Run Tests - Postgres" environment: - POSTGRES_TEST_HOST: localhost - POSTGRES_TEST_USER: root - POSTGRES_TEST_PASS: "" - POSTGRES_TEST_PORT: 5432 - POSTGRES_TEST_DBNAME: circle_test + POSTGRES_HOST: localhost + POSTGRES_USER: root + DBT_ENV_SECRET_POSTGRES_PASS: "" + POSTGRES_PORT: 5432 + POSTGRES_DATABASE: circle_test + POSTGRES_SCHEMA: codegen_integration_tests_postgres command: | . dbt_venv/bin/activate cd integration_tests diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..6a8d288 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,33 @@ +# **what?** +# Run tests for dbt-codegen against supported adapters + +# **why?** +# To ensure that dbt-codegen works as expected with all supported adapters + +# **when?** +# On every PR, and every push to main and when manually triggered + +name: Package Integration Tests + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + run-tests: + uses: dbt-labs/dbt-package-testing/.github/workflows/run_tox.yml@v1 + # this just tests with postgres so no variables need to be passed through. + # When it's time to add more adapters you will need to pass through inputs for + # the other adapters as shown in the below example for redshift + # with: + # # redshift + # REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} + # REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} + # REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }} + # REDSHIFT_SCHEMA: "integration_tests_redshift_${{ github.run_number }}" + # REDSHIFT_PORT: ${{ vars.REDSHIFT_PORT }} + # secrets: + # DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.DBT_ENV_SECRET_REDSHIFT_PASS }} diff --git a/Makefile b/Makefile index 9472484..e3545fc 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,11 @@ test: ## Run the integration tests. @./run_test.sh $(target) +.PHONY: test_tox +test: ## Run the integration tests with tox + @\ + tox -e dbt_integration_$(target) + .PHONY: dev dev: ## Installs dbt-* packages in develop mode along with development dependencies. @\ diff --git a/integration_tests/.env/bigquery.env b/integration_tests/.env/bigquery.env new file mode 100644 index 0000000..4972fca --- /dev/null +++ b/integration_tests/.env/bigquery.env @@ -0,0 +1,3 @@ +BIGQUERY_PROJECT= +BIGQUERY_SCHEMA= +BIGQUERY_KEYFILE_JSON= diff --git a/integration_tests/.env/postgres.env b/integration_tests/.env/postgres.env new file mode 100644 index 0000000..4fd155c --- /dev/null +++ b/integration_tests/.env/postgres.env @@ -0,0 +1,6 @@ +POSTGRES_HOST=localhost +POSTGRES_USER=root +DBT_ENV_SECRET_POSTGRES_PASS=password +POSTGRES_PORT=5432 +POSTGRES_DATABASE=codegen_test +POSTGRES_SCHEMA=codegen_integration_tests_postgres diff --git a/integration_tests/.env/redshift.env b/integration_tests/.env/redshift.env new file mode 100644 index 0000000..baf768c --- /dev/null +++ b/integration_tests/.env/redshift.env @@ -0,0 +1,5 @@ +REDSHIFT_TEST_HOST= +REDSHIFT_TEST_USER= +REDSHIFT_TEST_PASS= +REDSHIFT_TEST_DBNAME= +REDSHIFT_TEST_PORT= diff --git a/integration_tests/.env/snowflake.env b/integration_tests/.env/snowflake.env new file mode 100644 index 0000000..eb10da9 --- /dev/null +++ b/integration_tests/.env/snowflake.env @@ -0,0 +1,6 @@ +SNOWFLAKE_TEST_ACCOUNT= +SNOWFLAKE_TEST_USER= +SNOWFLAKE_TEST_PASSWORD= +SNOWFLAKE_TEST_ROLE= +SNOWFLAKE_TEST_DATABASE= +SNOWFLAKE_TEST_WAREHOUSE= diff --git a/integration_tests/README.md b/integration_tests/README.md index 1337e57..ed9c863 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -44,35 +44,7 @@ You can set these env vars in a couple ways: - **Temporary**: Set these environment variables in your shell before running the tests. This is the easiest way to get started, but you'll have to set them every time you open a new terminal. - **Reusable**: If you anticipate developing for multiple sessions, set these environment variables in your shell profile (like `~/.bashrc` or `~/.zshrc`). This way, you won't have to set them every time you open a new terminal. -The environment variables you'll need to set for each adapter are: - -```bash -# Postgres β€” these are the defaults for the Docker container so actually have values -export POSTGRES_TEST_HOST=localhost -export POSTGRES_TEST_USER=root -export POSTGRES_TEST_PASS='' -export POSTGRES_TEST_PORT=5432 -export POSTGRES_TEST_DBNAME=circle_test - -# BigQuery -export BIGQUERY_SERVICE_KEY_PATH= -export BIGQUERY_TEST_DATABASE= - -# Redshift -export REDSHIFT_TEST_HOST= -export REDSHIFT_TEST_USER= -export REDSHIFT_TEST_PASS= -export REDSHIFT_TEST_DBNAME= -export REDSHIFT_TEST_PORT= - -# Snowflake -export SNOWFLAKE_TEST_ACCOUNT= -export SNOWFLAKE_TEST_USER= -export SNOWFLAKE_TEST_PASSWORD= -export SNOWFLAKE_TEST_ROLE= -export SNOWFLAKE_TEST_DATABASE= -export SNOWFLAKE_TEST_WAREHOUSE= -``` +The environment variables you'll need to set for each adapter can be found in [integration_tests/.env/](integration_tests/.env/). ### Setup Postgres or other database targets @@ -139,7 +111,9 @@ source .venv/bin/activate ## Write or modify an integration test -### Run the integration tests +Run all the tests _before_ you start developing to make sure everything is working as expected before you start making changes. Nothing is worse than spending a ton of time troubleshooting a failing test, only to realize it was failing before you touched anything. This will also ensure that you have the correct environment variables set up and that your database is running. + +### Run the Circle CI integration tests To run all the integration tests on your local machine like they will get run in CI: @@ -157,7 +131,13 @@ make test target=postgres ./run_test.sh postgres ``` -Run all the tests _before_ you start developing to make sure everything is working as expected before you start making changes. Nothing is worse than spending a ton of time troubleshooting a failing test, only to realize it was failing before you touched anything. This will also ensure that you have the correct environment variables set up and that your database is running. +### Run the tox Supported Tests + +To run all the integration tests on your local machine like they will get run in the CI (using GitHub workflows with tox): + +```shell +make test_tox target=postgres +``` ### Creating a new integration test diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 6f194d0..7d0c8f2 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -15,6 +15,10 @@ clean-targets: - "target" - "dbt_packages" +flags: + send_anonymous_usage_stats: False + use_colors: True + seeds: +schema: raw_data +quote_columns: false diff --git a/integration_tests/profiles.yml b/integration_tests/profiles.yml index ffe7a71..0db3973 100644 --- a/integration_tests/profiles.yml +++ b/integration_tests/profiles.yml @@ -1,23 +1,20 @@ -# HEY! This file is used in the dbt-codegen integrations tests with CircleCI. -# You should __NEVER__ check credentials into version control. Thanks for reading :) - -config: - send_anonymous_usage_stats: False - use_colors: True +# HEY! This file is used in the dbt-codegen integrations tests with GitHub CI. +# You should __NEVER__ check credentials into version control. That's why we use environment variables everywhere. +# Thanks for reading :) integration_tests: target: postgres outputs: postgres: - type: postgres - host: "{{ env_var('POSTGRES_TEST_HOST') }}" - user: "{{ env_var('POSTGRES_TEST_USER') }}" - pass: "{{ env_var('POSTGRES_TEST_PASS') }}" - port: "{{ env_var('POSTGRES_TEST_PORT') | as_number }}" - dbname: "{{ env_var('POSTGRES_TEST_DBNAME') }}" - schema: codegen_integration_tests_postgres - threads: 1 + type: "postgres" + host: "{{ env_var('POSTGRES_HOST') }}" + user: "{{ env_var('POSTGRES_USER') }}" + pass: "{{ env_var('DBT_ENV_SECRET_POSTGRES_PASS') }}" + port: "{{ env_var('POSTGRES_PORT') | as_number }}" + dbname: "{{ env_var('POSTGRES_DATABASE') }}" + schema: "{{ env_var('POSTGRES_SCHEMA') }}" + threads: 5 redshift: type: redshift diff --git a/supported_adapters.env b/supported_adapters.env new file mode 100644 index 0000000..79c9a19 --- /dev/null +++ b/supported_adapters.env @@ -0,0 +1 @@ +SUPPORTED_ADAPTERS=postgres diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..9918e82 --- /dev/null +++ b/tox.ini @@ -0,0 +1,27 @@ +[tox] +skipsdist = True +envlist = lint_all, testenv + +[testenv] +passenv = + # postgres env vars + POSTGRES_HOST + POSTGRES_USER + DBT_ENV_SECRET_POSTGRES_PASS + POSTGRES_PORT + POSTGRES_DATABASE + POSTGRES_SCHEMA + +# Postgres integration tests for centralized dbt testing +# run dbt commands directly, assumes dbt is already installed in environment +[testenv:dbt_integration_postgres] +changedir = integration_tests +allowlist_externals = + dbt +skip_install = true +commands = + dbt --warn-error deps --target postgres + dbt --warn-error run-operation create_source_table --target postgres + dbt --warn-error seed --target postgres --full-refresh + dbt --warn-error run --target postgres + dbt --warn-error test --target postgres From 7acc8c0ebc80c3d220b9fbbc61fd5ed816cbdb8b Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Tue, 19 Nov 2024 16:54:01 -0700 Subject: [PATCH 03/28] Restore CI test for case-sensitive identifiers when generating sources (#192) * Restore case sensitivity test * Try to make integration test adapter-agnostic * Try to make integration test CI-agnostic and adapter-agnostic * Use adapter-agnostic data types * Temporarily hard-code case-insensitive seed name for Snowflake * Temporarily hard-code case-insensitive seed name for Snowflake --- integration_tests/dbt_project.yml | 4 +++ .../seeds/data__Case_Sensitive.csv | 3 ++ .../test_generate_source_case_sensitive.sql | 35 +++++++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 integration_tests/seeds/data__Case_Sensitive.csv create mode 100644 integration_tests/tests/test_generate_source_case_sensitive.sql diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 7d0c8f2..5e82ee7 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -22,6 +22,10 @@ flags: seeds: +schema: raw_data +quote_columns: false + codegen_integration_tests: + data__Case_Sensitive: + +schema: Raw_Data_Case_Sensitive + quote_columns: true vars: my_table_reference: table_c diff --git a/integration_tests/seeds/data__Case_Sensitive.csv b/integration_tests/seeds/data__Case_Sensitive.csv new file mode 100644 index 0000000..d0f2e97 --- /dev/null +++ b/integration_tests/seeds/data__Case_Sensitive.csv @@ -0,0 +1,3 @@ +Col_A,Col_B +1,a +2,b diff --git a/integration_tests/tests/test_generate_source_case_sensitive.sql b/integration_tests/tests/test_generate_source_case_sensitive.sql new file mode 100644 index 0000000..eac7169 --- /dev/null +++ b/integration_tests/tests/test_generate_source_case_sensitive.sql @@ -0,0 +1,35 @@ + +{% set raw_schema = generate_schema_name('Raw_Data_Case_Sensitive') %} + +-- test default args +{% set actual_source_yaml = codegen.generate_source( + schema_name=raw_schema, + database_name=target.database, + generate_columns=True, + name=raw_schema, + include_database=True, + include_schema=True, + case_sensitive_databases=True, + case_sensitive_schemas=True, + case_sensitive_tables=True, + case_sensitive_cols=True +) %} + +{% set expected_source_yaml %} +version: 2 + +sources: + - name: {{ raw_schema | lower }} + database: {{ target.database }} + schema: {{ raw_schema }} + tables: + - name: {% if target.type == "snowflake" %}DATA__CASE_SENSITIVE{% else %}data__Case_Sensitive{% endif %} + columns: + - name: Col_A + data_type: {{ integer_type_value() }} + - name: Col_B + data_type: {{ text_type_value() }} +{% endset %} + + +{{ assert_equal (actual_source_yaml | trim, expected_source_yaml | trim) }} From 4668e220e9f2feec6851bf32c9ff5adc5cd30345 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:41:24 -0700 Subject: [PATCH 04/28] Changlogs for 0.12.0, 0.12.1, and 0.13.0-b1 (#196) --- CHANGELOG.md | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f8658e..06cfa96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,32 +1,54 @@ -# dbt-codegen v0.12.0-b1 +# dbt-codegen v0.13.0-b1 ## What's Changed ### Features -- `generate_model_yaml` with `upstream_descriptions=True` now reads from upstream sources in addition to models.(#112)[https://github.com/dbt-labs/dbt-codegen/issues/112] -- `generate_source` now has options for case sensitivity in all fields ([#112](https://github.com/dbt-labs/dbt-codegen/issues/112) +* Read upstream descriptions from sources by @esegal in https://github.com/dbt-labs/dbt-codegen/pull/154 +* Case sensitive generate source by @pnadolny13 in https://github.com/dbt-labs/dbt-codegen/pull/168 ### Fixes -- Column `description` fields are now correctly escaped in `generate_model_yaml` ([#142](https://github.com/dbt-labs/dbt-codegen/issues/142)) +* Escape upstream descriptions in generate_model_yaml by @wircho in https://github.com/dbt-labs/dbt-codegen/pull/159 ### Docs -- Fixed `generate_source` documentation -- Rewrote the contributor README at `integration_tests/README.md` +* fix generate_source example by @yatsky in https://github.com/dbt-labs/dbt-codegen/pull/164 +* Improve developer README by @gwenwindflower in https://github.com/dbt-labs/dbt-codegen/pull/163 +* Fix bad spacing in dev README by @gwenwindflower in https://github.com/dbt-labs/dbt-codegen/pull/170 +* Update Changelog by @gwenwindflower in https://github.com/dbt-labs/dbt-codegen/pull/174 ## New Contributors - @wircho made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/159 - @yatsky made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/164 -- @pnadolny made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/168 +- @pnadolny13 made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/168 - @esegal made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/154 - @gwenwindflower made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/163 -**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.11.0...0.12.0-b1 +**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.12.1...v0.13.0-b1 -## New features +# dbt-codegen v0.12.1 + +## What's Changed +* Add dispatch to macros by @jeremyyeo in https://github.com/dbt-labs/dbt-codegen/pull/148 +* Remove terminal output in the generated file. by @vijmen in https://github.com/dbt-labs/dbt-codegen/pull/149 + +## New Contributors +* @jeremyyeo made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/148 +* @vijmen made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/149 + +**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.12.0...0.12.1 + +# dbt-codegen v0.12.0 + +## What's Changed +* Use print for outputting codegen by @JorgenG in https://github.com/dbt-labs/dbt-codegen/pull/86 + +## New Contributors +* @JorgenG made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/86 + +**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.11.0...0.12.0 # dbt-codegen v0.11.0 From c959e4dead65050aa7a190f0b5b55f8eb9176724 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Wed, 20 Nov 2024 14:28:43 -0600 Subject: [PATCH 05/28] add snowflake (#198) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add snowflake * fix profiles * check what’s installed * pass in snowflake password * and more env to gitignore --- .github/workflows/ci.yml | 19 ++++++++++--------- .gitignore | 2 +- integration_tests/profiles.yml | 16 ++++++++-------- supported_adapters.env | 2 +- tox.ini | 24 ++++++++++++++++++++++++ 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6a8d288..c7497bc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,12 +22,13 @@ jobs: # this just tests with postgres so no variables need to be passed through. # When it's time to add more adapters you will need to pass through inputs for # the other adapters as shown in the below example for redshift - # with: - # # redshift - # REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} - # REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} - # REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }} - # REDSHIFT_SCHEMA: "integration_tests_redshift_${{ github.run_number }}" - # REDSHIFT_PORT: ${{ vars.REDSHIFT_PORT }} - # secrets: - # DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.DBT_ENV_SECRET_REDSHIFT_PASS }} + with: + # snowflake + SNOWFLAKE_USER: ${{ vars.SNOWFLAKE_USER }} + SNOWFLAKE_ROLE: ${{ vars.SNOWFLAKE_ROLE }} + SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_DATABASE }} + SNOWFLAKE_WAREHOUSE: ${{ vars.SNOWFLAKE_WAREHOUSE }} + SNOWFLAKE_SCHEMA: "integration_tests_snowflake_${{ github.run_number }}" + secrets: + SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} + DBT_ENV_SECRET_SNOWFLAKE_PASS: ${{ secrets.SNOWFLAKE_PASS }} diff --git a/.gitignore b/.gitignore index 1e302bd..252f27b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ target/ dbt_modules/ dbt_packages/ logs/ -env/ +env*/ .venv/ .env/ venv/ diff --git a/integration_tests/profiles.yml b/integration_tests/profiles.yml index 0db3973..f2ddf18 100644 --- a/integration_tests/profiles.yml +++ b/integration_tests/profiles.yml @@ -35,12 +35,12 @@ integration_tests: threads: 1 snowflake: - type: snowflake - account: "{{ env_var('SNOWFLAKE_TEST_ACCOUNT') }}" - user: "{{ env_var('SNOWFLAKE_TEST_USER') }}" - password: "{{ env_var('SNOWFLAKE_TEST_PASSWORD') }}" - role: "{{ env_var('SNOWFLAKE_TEST_ROLE') }}" - database: "{{ env_var('SNOWFLAKE_TEST_DATABASE') }}" - warehouse: "{{ env_var('SNOWFLAKE_TEST_WAREHOUSE') }}" - schema: codegen_integration_tests_snowflake + type: "snowflake" + account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}" + user: "{{ env_var('SNOWFLAKE_USER') }}" + password: "{{ env_var('DBT_ENV_SECRET_SNOWFLAKE_PASS') }}" + role: "{{ env_var('SNOWFLAKE_ROLE') }}" + database: "{{ env_var('SNOWFLAKE_DATABASE') }}" + warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}" + schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}" threads: 1 diff --git a/supported_adapters.env b/supported_adapters.env index 79c9a19..8d18434 100644 --- a/supported_adapters.env +++ b/supported_adapters.env @@ -1 +1 @@ -SUPPORTED_ADAPTERS=postgres +SUPPORTED_ADAPTERS=postgres,snowflake diff --git a/tox.ini b/tox.ini index 9918e82..d10286b 100644 --- a/tox.ini +++ b/tox.ini @@ -11,6 +11,14 @@ passenv = POSTGRES_PORT POSTGRES_DATABASE POSTGRES_SCHEMA + # snowflake env vars + SNOWFLAKE_ACCOUNT + SNOWFLAKE_USER + DBT_ENV_SECRET_SNOWFLAKE_PASS + SNOWFLAKE_ROLE + SNOWFLAKE_DATABASE + SNOWFLAKE_WAREHOUSE + SNOWFLAKE_SCHEMA # Postgres integration tests for centralized dbt testing # run dbt commands directly, assumes dbt is already installed in environment @@ -20,8 +28,24 @@ allowlist_externals = dbt skip_install = true commands = + dbt --version dbt --warn-error deps --target postgres dbt --warn-error run-operation create_source_table --target postgres dbt --warn-error seed --target postgres --full-refresh dbt --warn-error run --target postgres dbt --warn-error test --target postgres + +# snowflake integration tests for centralized dbt testing +# run dbt commands directly, assumes dbt is already installed in environment +[testenv:dbt_integration_snowflake] +changedir = integration_tests +allowlist_externals = + dbt +skip_install = true +commands = + dbt --version + dbt --warn-error deps --target snowflake + dbt --warn-error run-operation create_source_table --target snowflake + dbt --warn-error seed --target snowflake --full-refresh + dbt --warn-error run --target snowflake + dbt --warn-error test --target snowflake From b444bf836132d316efa0c1286eb366f30c9b941f Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:42:38 -0700 Subject: [PATCH 06/28] Fix quoted identifiers in the `generate_base_model` macro for BigQuery (#199) * Use `adapter.quote` to create a case-sensitive quoted identifier for column names * Force a failure for all adapters to help troubleshoot * Revert "Force a failure for all adapters to help troubleshoot" This reverts commit d707832d3e13d12e3619b248cc87f1c72447bafb. * Use `adapter.quote` to create a case-sensitive quoted identifier for column names in `generate_base_model` macro --- integration_tests/macros/operations/create_source_table.sql | 4 ++-- .../tests/test_generate_base_models_all_args.sql | 4 ++-- .../tests/test_generate_base_models_case_sensitive.sql | 4 ++-- macros/generate_base_model.sql | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/integration_tests/macros/operations/create_source_table.sql b/integration_tests/macros/operations/create_source_table.sql index 6794e51..8123f33 100644 --- a/integration_tests/macros/operations/create_source_table.sql +++ b/integration_tests/macros/operations/create_source_table.sql @@ -48,8 +48,8 @@ set enable_case_sensitive_identifier to true; {% set create_table_sql_case_sensitive %} create table {{ target_schema }}.codegen_integration_tests__data_source_table_case_sensitive as ( select - 1 as {% if target.type == "bigquery" %}My_Integer_Col{% else %}"My_Integer_Col"{% endif %}, - true as {% if target.type == "bigquery" %}My_Bool_Col{% else %}"My_Bool_Col"{% endif %} + 1 as {{ adapter.quote("My_Integer_Col") }}, + true as {{ adapter.quote("My_Bool_Col") }} ) {% endset %} diff --git a/integration_tests/tests/test_generate_base_models_all_args.sql b/integration_tests/tests/test_generate_base_models_all_args.sql index 5a0b21e..89497de 100644 --- a/integration_tests/tests/test_generate_base_models_all_args.sql +++ b/integration_tests/tests/test_generate_base_models_all_args.sql @@ -20,8 +20,8 @@ with source as ( renamed as ( select - {% if target.type == "bigquery" %}My_Integer_Col{% else %}"My_Integer_Col"{% endif %} - , {% if target.type == "bigquery" %}My_Bool_Col{% else %}"My_Bool_Col"{% endif %} + {{ adapter.quote("My_Integer_Col") }} + , {{ adapter.quote("My_Bool_Col") }} from source diff --git a/integration_tests/tests/test_generate_base_models_case_sensitive.sql b/integration_tests/tests/test_generate_base_models_case_sensitive.sql index 2fd3123..1f18a1c 100644 --- a/integration_tests/tests/test_generate_base_models_case_sensitive.sql +++ b/integration_tests/tests/test_generate_base_models_case_sensitive.sql @@ -16,8 +16,8 @@ with source as ( renamed as ( select - {% if target.type == "bigquery" %}My_Integer_Col{% else %}"My_Integer_Col"{% endif %}, - {% if target.type == "bigquery" %}My_Bool_Col{% else %}"My_Bool_Col"{% endif %} + {{ adapter.quote("My_Integer_Col") }}, + {{ adapter.quote("My_Bool_Col") }} from source diff --git a/macros/generate_base_model.sql b/macros/generate_base_model.sql index 0a58784..aa74145 100644 --- a/macros/generate_base_model.sql +++ b/macros/generate_base_model.sql @@ -25,11 +25,11 @@ renamed as ( select {%- if leading_commas -%} {%- for column in column_names %} - {{", " if not loop.first}}{% if not case_sensitive_cols %}{{ column | lower }}{% elif target.type == "bigquery" %}{{ column }}{% else %}{{ "\"" ~ column ~ "\"" }}{% endif %} + {{", " if not loop.first}}{% if not case_sensitive_cols %}{{ column | lower }}{% else %}{{ adapter.quote(column) }}{% endif %} {%- endfor %} {%- else -%} {%- for column in column_names %} - {% if not case_sensitive_cols %}{{ column | lower }}{% elif target.type == "bigquery" %}{{ column }}{% else %}{{ "\"" ~ column ~ "\"" }}{% endif %}{{"," if not loop.last}} + {% if not case_sensitive_cols %}{{ column | lower }}{% else %}{{ adapter.quote(column) }}{% endif %}{{"," if not loop.last}} {%- endfor -%} {%- endif %} From 696c9f01758d727f9c1a360fcbe5af7b5ab7fa1e Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Thu, 21 Nov 2024 08:04:23 -0700 Subject: [PATCH 07/28] Try removing Redshift-specific logic (#208) --- .../macros/operations/create_source_table.sql | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/integration_tests/macros/operations/create_source_table.sql b/integration_tests/macros/operations/create_source_table.sql index 8123f33..61d0814 100644 --- a/integration_tests/macros/operations/create_source_table.sql +++ b/integration_tests/macros/operations/create_source_table.sql @@ -1,12 +1,5 @@ {% macro create_source_table() %} -{% if target.type == "redshift" %} -{% set disable_case_sensitive %} -reset enable_case_sensitive_identifier; -{% endset %} -{{ run_query(disable_case_sensitive) }} -{% endif %} - {% set target_schema=api.Relation.create( database=target.database, schema="codegen_integration_tests__data_source_schema" @@ -38,13 +31,6 @@ drop table if exists {{ target_schema }}.codegen_integration_tests__data_source_ {{ run_query(drop_table_sql_case_sensitive) }} -{% if target.type == "redshift" %} -{% set enable_case_sensitive %} -set enable_case_sensitive_identifier to true; -{% endset %} -{{ run_query(enable_case_sensitive) }} -{% endif %} - {% set create_table_sql_case_sensitive %} create table {{ target_schema }}.codegen_integration_tests__data_source_table_case_sensitive as ( select From 5caf2e39bbc171a620049ada7336d125d8993bbf Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:31:03 -0700 Subject: [PATCH 08/28] =?UTF-8?q?Use=20the=20`cimg/postgres`=20Docker=20im?= =?UTF-8?q?age=20created=20by=20CircleCI=20with=20continu=E2=80=A6=20(#214?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Use the `cimg/postgres` Docker image created by CircleCI with continuous integration builds in mind * Add the root Postgres user to the environment --- .circleci/config.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6746196..cf6f1a3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,8 +4,9 @@ jobs: build: docker: - image: cimg/python:3.9.9 - - image: circleci/postgres:9.6.5-alpine-ram - + - image: cimg/postgres:9.6 + environment: + POSTGRES_USER: root steps: - checkout From 6388e32c4629d7ff8a31dc0a1b90419937c208cf Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:01:17 -0700 Subject: [PATCH 09/28] Independent workflow job for dbt-postgres (#215) * Independent workflow job for dbt-postgres * Remove activation of virtual environment * Try without `python -m` * Independent workflow job for dbt-redshift * Independent workflow job for dbt-snowflake * Independent workflow job for dbt-snowflake * Independent workflow job for dbt-bigquery * Independent workflow job for dbt-bigquery * Independent workflow job for dbt-bigquery * Independent workflow job for dbt-bigquery * Independent workflow job for dbt-bigquery * Setup environment variables for dbt-bigquery --- .circleci/config.yml | 123 ++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 47 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cf6f1a3..acc10fd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,100 +1,129 @@ version: 2 jobs: - build: + + integration-postgres: docker: - - image: cimg/python:3.9.9 + - image: cimg/python:3.9 - image: cimg/postgres:9.6 environment: POSTGRES_USER: root + environment: + POSTGRES_HOST: localhost + POSTGRES_USER: root + DBT_ENV_SECRET_POSTGRES_PASS: '' + POSTGRES_PORT: 5432 + POSTGRES_DATABASE: circle_test + POSTGRES_SCHEMA: codegen_integration_tests_postgres + steps: - checkout - - - run: - name: setup_creds - command: | - echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json - - - restore_cache: - key: deps1-{{ .Branch }} - - - run: - name: "Setup dbt" - command: | - python3 -m venv dbt_venv - . dbt_venv/bin/activate - - python -m pip install --upgrade pip setuptools - python -m pip install --pre dbt-core dbt-postgres dbt-redshift dbt-snowflake dbt-bigquery - + - run: pip install --pre dbt-core dbt-postgres - run: name: "Run Tests - Postgres" - environment: - POSTGRES_HOST: localhost - POSTGRES_USER: root - DBT_ENV_SECRET_POSTGRES_PASS: "" - POSTGRES_PORT: 5432 - POSTGRES_DATABASE: circle_test - POSTGRES_SCHEMA: codegen_integration_tests_postgres command: | - . dbt_venv/bin/activate cd integration_tests dbt --warn-error deps --target postgres dbt --warn-error run-operation create_source_table --target postgres dbt --warn-error seed --target postgres --full-refresh dbt --warn-error run --target postgres dbt --warn-error test --target postgres + - store_artifacts: + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target + + # The resource_class feature allows configuring CPU and RAM resources for each job. Different resource classes are available for different executors. https://circleci.com/docs/2.0/configuration-reference/#resourceclass + resource_class: large + integration-redshift: + docker: + - image: cimg/python:3.9 + steps: + - checkout + - run: pip install --pre dbt-core dbt-redshift - run: name: "Run Tests - Redshift" command: | - . dbt_venv/bin/activate - echo `pwd` cd integration_tests dbt --warn-error deps --target redshift dbt --warn-error run-operation create_source_table --target redshift dbt --warn-error seed --target redshift --full-refresh dbt --warn-error run --target redshift dbt --warn-error test --target redshift + - store_artifacts: + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target + # The resource_class feature allows configuring CPU and RAM resources for each job. Different resource classes are available for different executors. https://circleci.com/docs/2.0/configuration-reference/#resourceclass + resource_class: large + integration-snowflake: + docker: + - image: cimg/python:3.9 + steps: + - checkout + - run: pip install --pre dbt-core dbt-snowflake - run: name: "Run Tests - Snowflake" command: | - . dbt_venv/bin/activate - echo `pwd` cd integration_tests dbt --warn-error deps --target snowflake dbt --warn-error run-operation create_source_table --target snowflake dbt --warn-error seed --target snowflake --full-refresh dbt --warn-error run --target snowflake dbt --warn-error test --target snowflake + - store_artifacts: + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target + # The resource_class feature allows configuring CPU and RAM resources for each job. Different resource classes are available for different executors. https://circleci.com/docs/2.0/configuration-reference/#resourceclass + resource_class: large + integration-bigquery: + environment: + BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" + docker: + - image: cimg/python:3.9 + steps: + - checkout + - run: pip install --pre dbt-core dbt-bigquery + - run: + name: Setup Environment Variables + command: | + echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json + echo 'export BIGQUERY_KEYFILE_JSON="$BIGQUERY_SERVICE_ACCOUNT_JSON"' >> "$BASH_ENV" - run: name: "Run Tests - BigQuery" - environment: - BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" - command: | - . dbt_venv/bin/activate - echo `pwd` cd integration_tests dbt --warn-error deps --target bigquery dbt --warn-error run-operation create_source_table --target bigquery dbt --warn-error seed --target bigquery --full-refresh dbt --warn-error run --target bigquery dbt --warn-error test --target bigquery - - - save_cache: - key: deps1-{{ .Branch }} - paths: - - "dbt_venv" + - store_artifacts: + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target + # The resource_class feature allows configuring CPU and RAM resources for each job. Different resource classes are available for different executors. https://circleci.com/docs/2.0/configuration-reference/#resourceclass + resource_class: large workflows: version: 2 test-all: jobs: - - build: - context: - - profile-redshift - - profile-snowflake - - profile-bigquery + - integration-postgres: + context: profile-postgres + - integration-redshift: + context: profile-redshift + requires: + - integration-postgres + - integration-snowflake: + context: profile-snowflake + requires: + - integration-postgres + - integration-bigquery: + context: profile-bigquery + requires: + - integration-postgres From f41d1b9c23f9d9dad4ab05de74a3f0ed0a78a766 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:22:00 -0700 Subject: [PATCH 10/28] Simplify environment variables for BigQuery in CircleCI (#216) * Simplify environment variables for BigQuery in CircleCI * Fix YAML parsing error * Fix reference to environment variable * Fix reference to environment variable --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index acc10fd..e6ccb79 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -91,7 +91,7 @@ jobs: - run: name: Setup Environment Variables command: | - echo $BIGQUERY_SERVICE_ACCOUNT_JSON > ${HOME}/bigquery-service-key.json + echo $BIGQUERY_SERVICE_ACCOUNT_JSON > $BIGQUERY_SERVICE_KEY_PATH echo 'export BIGQUERY_KEYFILE_JSON="$BIGQUERY_SERVICE_ACCOUNT_JSON"' >> "$BASH_ENV" - run: name: "Run Tests - BigQuery" From 1cc3090cb75629274f1d4d80f3bbb9aca030e962 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 22 Nov 2024 08:08:31 -0700 Subject: [PATCH 11/28] Stop installing prereleases from PyPI in favor of stable releases only (#220) --- .circleci/config.yml | 8 ++++---- Makefile | 2 +- integration_tests/README.md | 7 ++----- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e6ccb79..d873aa4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,7 +18,7 @@ jobs: steps: - checkout - - run: pip install --pre dbt-core dbt-postgres + - run: pip install dbt-core dbt-postgres - run: name: "Run Tests - Postgres" command: | @@ -41,7 +41,7 @@ jobs: - image: cimg/python:3.9 steps: - checkout - - run: pip install --pre dbt-core dbt-redshift + - run: pip install dbt-core dbt-redshift - run: name: "Run Tests - Redshift" command: | @@ -63,7 +63,7 @@ jobs: - image: cimg/python:3.9 steps: - checkout - - run: pip install --pre dbt-core dbt-snowflake + - run: pip install dbt-core dbt-snowflake - run: name: "Run Tests - Snowflake" command: | @@ -87,7 +87,7 @@ jobs: - image: cimg/python:3.9 steps: - checkout - - run: pip install --pre dbt-core dbt-bigquery + - run: pip install dbt-core dbt-bigquery - run: name: Setup Environment Variables command: | diff --git a/Makefile b/Makefile index e3545fc..d842cb9 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ dev: ## Installs dbt-* packages in develop mode along with development dependenc @\ echo "Install dbt-$(target)..."; \ python -m pip install --upgrade pip setuptools; \ - python -m pip install --pre dbt-core "dbt-$(target)"; + python -m pip install dbt-core "dbt-$(target)"; .PHONY: setup-db setup-db: ## Setup Postgres database with docker-compose for system testing. diff --git a/integration_tests/README.md b/integration_tests/README.md index ed9c863..91f9ef0 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -89,7 +89,7 @@ Next, install `dbt-core` (and its dependencies) with: ```shell make dev target=[postgres|redshift|...] # or -python3 -m pip install --pre dbt-core dbt-[postgres|redshift|...] +python3 -m pip install dbt-core dbt-[postgres|redshift|...] ``` Or more specific: @@ -97,12 +97,9 @@ Or more specific: ```shell make dev target=postgres # or -python3 -m pip install --pre dbt-core dbt-postgres +python3 -m pip install dbt-core dbt-postgres ``` -> [!NOTE] -> The `--pre` flag tells pip to install the latest pre-release version of whatever you pass to install. This ensures you're always using the latest version of dbt, so if your code interacts with dbt in a way that causes issues or test failures, we'll know about it ahead of a release. - Make sure to reload your virtual environment after installing the dependencies: ```shell From fdc998c8d1bda197bcc3be9f85272a6bdc3f2622 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 22 Nov 2024 08:38:52 -0700 Subject: [PATCH 12/28] Upgrade to Python 3.11 in CircleCI (#222) --- .circleci/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d873aa4..348ba3f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,7 +4,7 @@ jobs: integration-postgres: docker: - - image: cimg/python:3.9 + - image: cimg/python:3.11 - image: cimg/postgres:9.6 environment: POSTGRES_USER: root @@ -38,7 +38,7 @@ jobs: integration-redshift: docker: - - image: cimg/python:3.9 + - image: cimg/python:3.11 steps: - checkout - run: pip install dbt-core dbt-redshift @@ -60,7 +60,7 @@ jobs: integration-snowflake: docker: - - image: cimg/python:3.9 + - image: cimg/python:3.11 steps: - checkout - run: pip install dbt-core dbt-snowflake @@ -84,7 +84,7 @@ jobs: environment: BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" docker: - - image: cimg/python:3.9 + - image: cimg/python:3.11 steps: - checkout - run: pip install dbt-core dbt-bigquery From b28ab7d729683441e66310b8d5f8c47674d8b8e4 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 22 Nov 2024 14:18:44 -0700 Subject: [PATCH 13/28] Use dynamic schema names rather than hardcoded ones (#224) * Disable two CI tests * Use a dynamic schema name based off the target schema rather than a hardcoded one * Restore one of the CI tests * Try updating the expected output * Update expected model given upstream changes * Restore the other CI test * Update expected model given upstream changes --- .../macros/operations/create_source_table.sql | 2 +- .../models/model_without_import_ctes.sql | 2 +- integration_tests/models/source.yml | 1 + .../tests/test_generate_model_import_ctes.sql | 11 ++--------- .../tests/test_generate_model_import_ctes_leading.sql | 11 ++--------- 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/integration_tests/macros/operations/create_source_table.sql b/integration_tests/macros/operations/create_source_table.sql index 61d0814..9607d60 100644 --- a/integration_tests/macros/operations/create_source_table.sql +++ b/integration_tests/macros/operations/create_source_table.sql @@ -2,7 +2,7 @@ {% set target_schema=api.Relation.create( database=target.database, - schema="codegen_integration_tests__data_source_schema" + schema=target.schema ~ "__data_source_schema" ) %} diff --git a/integration_tests/models/model_without_import_ctes.sql b/integration_tests/models/model_without_import_ctes.sql index 94ace74..68a552f 100644 --- a/integration_tests/models/model_without_import_ctes.sql +++ b/integration_tests/models/model_without_import_ctes.sql @@ -20,7 +20,7 @@ with my_first_cte as ( my_second_cte as ( select 1 as id - from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table + from {{ target.schema }}__data_source_schema.codegen_integration_tests__data_source_table union all select 2 as id diff --git a/integration_tests/models/source.yml b/integration_tests/models/source.yml index 528139c..13a8d4c 100644 --- a/integration_tests/models/source.yml +++ b/integration_tests/models/source.yml @@ -2,6 +2,7 @@ version: 2 sources: - name: codegen_integration_tests__data_source_schema + schema: "{{ target.schema ~ '__data_source_schema' }}" tables: - name: codegen_integration_tests__data_source_table columns: diff --git a/integration_tests/tests/test_generate_model_import_ctes.sql b/integration_tests/tests/test_generate_model_import_ctes.sql index 8975550..51adc52 100644 --- a/integration_tests/tests/test_generate_model_import_ctes.sql +++ b/integration_tests/tests/test_generate_model_import_ctes.sql @@ -12,14 +12,7 @@ materialized='table', ) }}{% endraw %} -with codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( - - select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table - -- CAUTION: It's best practice to use the ref or source function instead of a direct reference - -), - -data__a_relation as ( +with data__a_relation as ( select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} @@ -108,7 +101,7 @@ my_first_cte as ( my_second_cte as ( select 1 as id - from codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table + from {% raw %}{{ target.schema }}{% endraw %}__data_source_schema.codegen_integration_tests__data_source_table union all select 2 as id diff --git a/integration_tests/tests/test_generate_model_import_ctes_leading.sql b/integration_tests/tests/test_generate_model_import_ctes_leading.sql index fd4d3e2..38057f2 100644 --- a/integration_tests/tests/test_generate_model_import_ctes_leading.sql +++ b/integration_tests/tests/test_generate_model_import_ctes_leading.sql @@ -13,14 +13,7 @@ materialized='table', ) }}{% endraw %} -with codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table as ( - - select * from codegen_integration_tests__data_source_schema.codegen_integration_tests__data_source_table - -- CAUTION: It's best practice to use the ref or source function instead of a direct reference - -) - -,data__a_relation as ( +with data__a_relation as ( select * from {% raw %}{{ ref('data__a_relation') }}{% endraw %} @@ -109,7 +102,7 @@ with codegen_integration_tests__data_source_schema_codegen_integration_tests__da my_second_cte as ( select 1 as id - from codegen_integration_tests__data_source_schema_codegen_integration_tests__data_source_table + from {% raw %}{{ target.schema }}{% endraw %}__data_source_schema.codegen_integration_tests__data_source_table union all select 2 as id From 972aa3fe374dbb90f827d04a2c116fb04a37df47 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 22 Nov 2024 15:31:31 -0600 Subject: [PATCH 14/28] add support for redshift testing (#204) Co-authored-by: Doug Beatty --- .github/workflows/ci.yml | 7 +++++++ integration_tests/profiles.yml | 14 +++++++------- supported_adapters.env | 2 +- tox.ini | 22 ++++++++++++++++++++++ 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c7497bc..e9c5724 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,13 @@ jobs: SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_DATABASE }} SNOWFLAKE_WAREHOUSE: ${{ vars.SNOWFLAKE_WAREHOUSE }} SNOWFLAKE_SCHEMA: "integration_tests_snowflake_${{ github.run_number }}" + # redshift + REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} + REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} + REDSHIFT_DATABASE: ${{ vars.REDSHIFT_DATABASE }} + REDSHIFT_SCHEMA: "integration_tests_redshift_${{ github.run_number }}" + REDSHIFT_PORT: ${{ vars.REDSHIFT_PORT }} secrets: SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} DBT_ENV_SECRET_SNOWFLAKE_PASS: ${{ secrets.SNOWFLAKE_PASS }} + DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.REDSHIFT_PASS }} diff --git a/integration_tests/profiles.yml b/integration_tests/profiles.yml index f2ddf18..d14af9c 100644 --- a/integration_tests/profiles.yml +++ b/integration_tests/profiles.yml @@ -17,13 +17,13 @@ integration_tests: threads: 5 redshift: - type: redshift - host: "{{ env_var('REDSHIFT_TEST_HOST') }}" - user: "{{ env_var('REDSHIFT_TEST_USER') }}" - pass: "{{ env_var('REDSHIFT_TEST_PASS') }}" - dbname: "{{ env_var('REDSHIFT_TEST_DBNAME') }}" - port: "{{ env_var('REDSHIFT_TEST_PORT') | as_number }}" - schema: codegen_integration_tests_redshift + type: "redshift" + host: "{{ env_var('REDSHIFT_HOST') }}" + user: "{{ env_var('REDSHIFT_USER') }}" + pass: "{{ env_var('DBT_ENV_SECRET_REDSHIFT_PASS') }}" + dbname: "{{ env_var('REDSHIFT_DATABASE') }}" + port: "{{ env_var('REDSHIFT_PORT') | as_number }}" + schema: "{{ env_var('REDSHIFT_SCHEMA') }}" threads: 1 bigquery: diff --git a/supported_adapters.env b/supported_adapters.env index 8d18434..7e73438 100644 --- a/supported_adapters.env +++ b/supported_adapters.env @@ -1 +1 @@ -SUPPORTED_ADAPTERS=postgres,snowflake +SUPPORTED_ADAPTERS=postgres,snowflake,redshift diff --git a/tox.ini b/tox.ini index d10286b..69c6b44 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,13 @@ passenv = SNOWFLAKE_DATABASE SNOWFLAKE_WAREHOUSE SNOWFLAKE_SCHEMA + # redshift env vars + REDSHIFT_HOST + REDSHIFT_USER + DBT_ENV_SECRET_REDSHIFT_PASS + REDSHIFT_DATABASE + REDSHIFT_SCHEMA + REDSHIFT_PORT # Postgres integration tests for centralized dbt testing # run dbt commands directly, assumes dbt is already installed in environment @@ -49,3 +56,18 @@ commands = dbt --warn-error seed --target snowflake --full-refresh dbt --warn-error run --target snowflake dbt --warn-error test --target snowflake + +# redshift integration tests for centralized dbt testing +# run dbt commands directly, assumes dbt is already installed in environment +[testenv:dbt_integration_redshift] +changedir = integration_tests +allowlist_externals = + dbt +skip_install = true +commands = + dbt --version + dbt --warn-error deps --target redshift + dbt --warn-error run-operation create_source_table --target redshift + dbt --warn-error seed --target redshift --full-refresh + dbt --warn-error run --target redshift + dbt --warn-error test --target redshift From a88bc126a289eb4ae23f535ed8ea6809047c3328 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 22 Nov 2024 16:15:12 -0600 Subject: [PATCH 15/28] Add support for bigquery testing in GitHub CI via tox (#203) * add support for bigquery testing * add missing var in tox file * Temporarily only run CI tests for BigQuery * Prefix the schema for the data source in CI with the name of the target schema * Store artifacts for logs and target directories for BigQuery * Set up environment variable for BigQuery credentials (keyfile for service account JSON) * Set the custom schema in the source definition * Use the target schema * Try to align actual vs. expected when the schema name is variable * Remove extraneous storage of artifacts * Temporarily disable two failing CI tests * Revert "Temporarily disable two failing CI tests" This reverts commit d70d776be66e3ee7085434a461b90a86f527645f. --------- Co-authored-by: Doug Beatty --- .github/workflows/ci.yml | 4 ++++ integration_tests/profiles.yml | 9 +++++---- supported_adapters.env | 2 +- tox.ini | 19 +++++++++++++++++++ 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e9c5724..2bb6f6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,6 +29,9 @@ jobs: SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_DATABASE }} SNOWFLAKE_WAREHOUSE: ${{ vars.SNOWFLAKE_WAREHOUSE }} SNOWFLAKE_SCHEMA: "integration_tests_snowflake_${{ github.run_number }}" + # bigquery + BIGQUERY_PROJECT: ${{ vars.BIGQUERY_PROJECT }} + BIGQUERY_SCHEMA: "integration_tests_bigquery_${{ github.run_number }}" # redshift REDSHIFT_HOST: ${{ vars.REDSHIFT_HOST }} REDSHIFT_USER: ${{ vars.REDSHIFT_USER }} @@ -39,3 +42,4 @@ jobs: SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} DBT_ENV_SECRET_SNOWFLAKE_PASS: ${{ secrets.SNOWFLAKE_PASS }} DBT_ENV_SECRET_REDSHIFT_PASS: ${{ secrets.REDSHIFT_PASS }} + BIGQUERY_KEYFILE_JSON: ${{ secrets.BIGQUERY_KEYFILE_JSON }} diff --git a/integration_tests/profiles.yml b/integration_tests/profiles.yml index d14af9c..e696051 100644 --- a/integration_tests/profiles.yml +++ b/integration_tests/profiles.yml @@ -28,10 +28,11 @@ integration_tests: bigquery: type: bigquery - method: service-account - keyfile: "{{ env_var('BIGQUERY_SERVICE_KEY_PATH') }}" - project: "{{ env_var('BIGQUERY_TEST_DATABASE') }}" - schema: codegen_integration_tests_bigquery + method: service-account-json + keyfile_json: + "{{ env_var('BIGQUERY_KEYFILE_JSON') | as_native}}" + project: "{{ env_var('BIGQUERY_PROJECT') }}" + dataset: "{{ env_var('BIGQUERY_SCHEMA') }}" threads: 1 snowflake: diff --git a/supported_adapters.env b/supported_adapters.env index 7e73438..3acc8a8 100644 --- a/supported_adapters.env +++ b/supported_adapters.env @@ -1 +1 @@ -SUPPORTED_ADAPTERS=postgres,snowflake,redshift +SUPPORTED_ADAPTERS=postgres,snowflake,redshift,bigquery diff --git a/tox.ini b/tox.ini index 69c6b44..835c0cd 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,10 @@ passenv = SNOWFLAKE_DATABASE SNOWFLAKE_WAREHOUSE SNOWFLAKE_SCHEMA + # bigquery env vars + BIGQUERY_PROJECT + BIGQUERY_SCHEMA + BIGQUERY_KEYFILE_JSON # redshift env vars REDSHIFT_HOST REDSHIFT_USER @@ -57,6 +61,21 @@ commands = dbt --warn-error run --target snowflake dbt --warn-error test --target snowflake +# bigquery integration tests for centralized dbt testing +# run dbt commands directly, assumes dbt is already installed in environment +[testenv:dbt_integration_bigquery] +changedir = integration_tests +allowlist_externals = + dbt +skip_install = true +commands = + dbt --version + dbt --warn-error deps --target bigquery + dbt --warn-error run-operation create_source_table --target bigquery + dbt --warn-error seed --target bigquery --full-refresh + dbt --warn-error run --target bigquery + dbt --warn-error test --target bigquery + # redshift integration tests for centralized dbt testing # run dbt commands directly, assumes dbt is already installed in environment [testenv:dbt_integration_redshift] From f5ec206be9d0ceb23fc9b48a2041539f2fac53fc Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Tue, 3 Dec 2024 10:51:12 -0700 Subject: [PATCH 16/28] Update changelog for 0.13.0 release (#227) --- CHANGELOG.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06cfa96..6899150 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,49 @@ +# dbt-codegen v0.13.0 + +## What's Changed + +### Features + +* Read upstream descriptions from sources by @esegal in https://github.com/dbt-labs/dbt-codegen/pull/154 +* Parameters in `generate_source` for case-sensitive identifiers by @pnadolny13 in https://github.com/dbt-labs/dbt-codegen/pull/168 + +### Fixes + +* Escape upstream descriptions in generate_model_yaml by @wircho in https://github.com/dbt-labs/dbt-codegen/pull/159 +* Fix quoted identifiers in the `generate_base_model` macro for BigQuery by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/199 + +### Docs + +* fix generate_source example by @yatsky in https://github.com/dbt-labs/dbt-codegen/pull/164 +* Improve developer README by @gwenwindflower in https://github.com/dbt-labs/dbt-codegen/pull/163 +* Fix bad spacing in dev README by @gwenwindflower in https://github.com/dbt-labs/dbt-codegen/pull/170 +* Changelogs for 0.12.0, 0.12.1, and 0.13.0-b1 by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/196 + +## Under the hood + +* Restore CI test for case-sensitive identifiers when generating sources by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/192 +* Remove Redshift-specific logic for toggling case-sensitive identifiers by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/208 +* Use the `cimg/postgres` Docker image by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/214 +* Independent CircleCI workflow job for each tested adapter by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/215 +* Simplify environment variables for BigQuery in CircleCI by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/216 +* Stop installing prereleases from PyPI in favor of stable releases only by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/220 +* Upgrade to Python 3.11 in CircleCI by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/222 +* Use dynamic schema names rather than hardcoded ones by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/224 +* Add support for postgres testing in GitHub CI via tox by @emmyoop by @emmyoop in https://github.com/dbt-labs/dbt-codegen/pull/181 +* Add support for snowflake testing in GitHub CI via tox by @emmyoop in https://github.com/dbt-labs/dbt-codegen/pull/198 +* Add support for redshift testing in GitHub CI via tox by @emmyoop in https://github.com/dbt-labs/dbt-codegen/pull/204 +* Add support for bigquery testing in GitHub CI via tox by @emmyoop in https://github.com/dbt-labs/dbt-codegen/pull/203 + +## New Contributors +* @wircho made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/159 +* @esegal made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/154 +* @yatsky made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/164 +* @gwenwindflower made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/163 +* @pnadolny13 made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/168 +* @emmyoop made their first contribution in https://github.com/dbt-labs/dbt-codegen/pull/181 + +**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.12.1...0.13.0 + # dbt-codegen v0.13.0-b1 ## What's Changed From a79f28fbd3ecd9773e477c48084f1cef87a34217 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:31:59 -0700 Subject: [PATCH 17/28] Revert "Restore CI test for case-sensitive identifiers when generating sources (#192)" (#230) This reverts commit 7acc8c0ebc80c3d220b9fbbc61fd5ed816cbdb8b. --- integration_tests/dbt_project.yml | 4 --- .../seeds/data__Case_Sensitive.csv | 3 -- .../test_generate_source_case_sensitive.sql | 35 ------------------- 3 files changed, 42 deletions(-) delete mode 100644 integration_tests/seeds/data__Case_Sensitive.csv delete mode 100644 integration_tests/tests/test_generate_source_case_sensitive.sql diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 5e82ee7..7d0c8f2 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -22,10 +22,6 @@ flags: seeds: +schema: raw_data +quote_columns: false - codegen_integration_tests: - data__Case_Sensitive: - +schema: Raw_Data_Case_Sensitive - quote_columns: true vars: my_table_reference: table_c diff --git a/integration_tests/seeds/data__Case_Sensitive.csv b/integration_tests/seeds/data__Case_Sensitive.csv deleted file mode 100644 index d0f2e97..0000000 --- a/integration_tests/seeds/data__Case_Sensitive.csv +++ /dev/null @@ -1,3 +0,0 @@ -Col_A,Col_B -1,a -2,b diff --git a/integration_tests/tests/test_generate_source_case_sensitive.sql b/integration_tests/tests/test_generate_source_case_sensitive.sql deleted file mode 100644 index eac7169..0000000 --- a/integration_tests/tests/test_generate_source_case_sensitive.sql +++ /dev/null @@ -1,35 +0,0 @@ - -{% set raw_schema = generate_schema_name('Raw_Data_Case_Sensitive') %} - --- test default args -{% set actual_source_yaml = codegen.generate_source( - schema_name=raw_schema, - database_name=target.database, - generate_columns=True, - name=raw_schema, - include_database=True, - include_schema=True, - case_sensitive_databases=True, - case_sensitive_schemas=True, - case_sensitive_tables=True, - case_sensitive_cols=True -) %} - -{% set expected_source_yaml %} -version: 2 - -sources: - - name: {{ raw_schema | lower }} - database: {{ target.database }} - schema: {{ raw_schema }} - tables: - - name: {% if target.type == "snowflake" %}DATA__CASE_SENSITIVE{% else %}data__Case_Sensitive{% endif %} - columns: - - name: Col_A - data_type: {{ integer_type_value() }} - - name: Col_B - data_type: {{ text_type_value() }} -{% endset %} - - -{{ assert_equal (actual_source_yaml | trim, expected_source_yaml | trim) }} From 314908f79909898d79f39d5b3551d6ead05ddbb7 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:42:35 -0700 Subject: [PATCH 18/28] Update changelog for 0.13.1 release (#232) --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6899150..30e34a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +# dbt-codegen v0.13.1 + +## What's Changed + +## Under the hood + +* Temporarily remove CI test for case-sensitive identifiers when generating sources by @dbeatty10 in https://github.com/dbt-labs/dbt-codegen/pull/230 + +**Full Changelog**: https://github.com/dbt-labs/dbt-codegen/compare/0.13.0...0.13.1 + # dbt-codegen v0.13.0 ## What's Changed From a3b6979ba8162738447cb658fa88e28f7fd6b1c0 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:27:22 -0700 Subject: [PATCH 19/28] Upgrade from Postgres 9 to 17 (#234) * Upgrade from Postgres 9 to 17 * The postgres image in CircleCI needs a major and minor version specified --- .circleci/config.yml | 2 +- docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 348ba3f..4d7dbd8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ jobs: integration-postgres: docker: - image: cimg/python:3.11 - - image: cimg/postgres:9.6 + - image: cimg/postgres:17.0 environment: POSTGRES_USER: root environment: diff --git a/docker-compose.yml b/docker-compose.yml index 6957ffe..88f7559 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.7" services: postgres: - image: cimg/postgres:9.6 + image: cimg/postgres:17.0 environment: - POSTGRES_USER=root ports: From 2f20e941c2313d27e1d150c6e66784b182393913 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Fri, 6 Dec 2024 11:42:52 -0600 Subject: [PATCH 20/28] Update ci.yml (#235) --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2bb6f6c..ddf5d53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ on: push: branches: - main - pull_request: + pull_request_target: workflow_dispatch: jobs: From 5a31c62e1f3adbbe158c2a6c78e034cdda0bfa54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magn=C3=BAs=20=C3=9E=C3=B3r=20Benediktsson?= Date: Fri, 13 Dec 2024 19:43:47 +0100 Subject: [PATCH 21/28] Bigquery repeated data type (#236) * Handle BigQuery repeated fields data_types * include nested repated structs * override repeated struct data_type with array * Add trailing newline * update changelog * Update CHANGELOG.md --------- Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- integration_tests/models/model_repeated.sql | 13 +++++ .../test_generate_model_repeated_yaml.sql | 56 +++++++++++++++++++ .../tests/test_helper_get_models.sql | 2 +- macros/vendored/dbt_core/format_column.sql | 16 ++++++ 4 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 integration_tests/models/model_repeated.sql create mode 100644 integration_tests/tests/test_generate_model_repeated_yaml.sql diff --git a/integration_tests/models/model_repeated.sql b/integration_tests/models/model_repeated.sql new file mode 100644 index 0000000..2c08a2f --- /dev/null +++ b/integration_tests/models/model_repeated.sql @@ -0,0 +1,13 @@ +{% if target.type == "bigquery" %} + + {#--- This exists to test the BigQuery-specific behavior requested in #190 -#} +select + [1, 2] AS repeated_int, + [ + STRUCT(1 as nested_int_field, [STRUCT("a" as string_field)] as nested_repeated_struct), + STRUCT(2 AS nested_int_field, [STRUCT("a" as string_field)] as nested_repeated_struct) + ] as repeated_struct + +{% else %} + select 1 as int_field +{% endif %} diff --git a/integration_tests/tests/test_generate_model_repeated_yaml.sql b/integration_tests/tests/test_generate_model_repeated_yaml.sql new file mode 100644 index 0000000..401cbe7 --- /dev/null +++ b/integration_tests/tests/test_generate_model_repeated_yaml.sql @@ -0,0 +1,56 @@ +{% set raw_schema = generate_schema_name('raw_data') %} + +{% set actual_source_yaml = codegen.generate_model_yaml( + model_names=['model_repeated'] + ) +%} + +{% if target.type == "bigquery" %} + +{% set expected_source_yaml %} +version: 2 + +models: + - name: model_repeated + description: "" + columns: + - name: repeated_int + data_type: array + description: "" + + - name: repeated_struct + data_type: array + description: "" + + - name: repeated_struct.nested_int_field + data_type: int64 + description: "" + + - name: repeated_struct.nested_repeated_struct + data_type: array + description: "" + + - name: repeated_struct.nested_repeated_struct.string_field + data_type: string + description: "" + +{% endset %} + +{% else %} + +{% set expected_source_yaml %} +version: 2 + +models: + - name: model_repeated + description: "" + columns: + - name: int_field + data_type: {{ integer_type_value() }} + description: "" + +{% endset %} + +{% endif %} + +{{ assert_equal (actual_source_yaml | trim, expected_source_yaml | trim) }} diff --git a/integration_tests/tests/test_helper_get_models.sql b/integration_tests/tests/test_helper_get_models.sql index d9393a3..95c4d0c 100644 --- a/integration_tests/tests/test_helper_get_models.sql +++ b/integration_tests/tests/test_helper_get_models.sql @@ -7,6 +7,6 @@ {% set actual_list = codegen.get_models(prefix='model_')|sort %} {% endif %} -{% set expected_list = ['model_data_a', 'model_from_source', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %} +{% set expected_list = ['model_data_a', 'model_from_source', 'model_repeated', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %} {{ assert_equal (actual_list, expected_list) }} diff --git a/macros/vendored/dbt_core/format_column.sql b/macros/vendored/dbt_core/format_column.sql index a7a6669..2365638 100644 --- a/macros/vendored/dbt_core/format_column.sql +++ b/macros/vendored/dbt_core/format_column.sql @@ -1,5 +1,21 @@ {% macro format_column(column) -%} + {{ return(adapter.dispatch('format_column', 'codegen')(column)) }} +{%- endmacro %} + +{# Vendored from: https://github.com/dbt-labs/dbt-adapters/blob/c7b12aee533184bad391a657d1753539d1dd496a/dbt/include/global_project/macros/relations/column/columns_spec_ddl.sql#L85-L89 #} +{% macro default__format_column(column) -%} {% set data_type = column.dtype %} {% set formatted = column.column.lower() ~ " " ~ data_type %} {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} {%- endmacro -%} + +{# Vendored from: https://github.com/dbt-labs/dbt-bigquery/blob/4d255b2f854d21d5d8871bdaa8d7ab47e7e863a3/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql#L1-L5 #} +{# But modified to handle https://github.com/dbt-labs/dbt-codegen/issues/190 #} +{% macro bigquery__format_column(column) -%} + {% set data_type = column.data_type %} + {% if column.mode.lower() == "repeated" and column.dtype.lower() == "record" %} + {% set data_type = "array" %} + {% endif %} + {% set formatted = column.column.lower() ~ " " ~ data_type %} + {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} +{%- endmacro -%} From 6137ca69282e3bffe23f2f479fa5d57b2031be13 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:51:58 -0700 Subject: [PATCH 22/28] Remove "I have added an entry to CHANGELOG.md" from the PR template (#239) --- .github/pull_request_template.md | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 393ad9f..1d6e973 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -18,4 +18,3 @@ Describe your changes, and why you're making them. - [ ] I have verified that these changes work locally - [ ] I have updated the README.md (if applicable) - [ ] I have added tests & descriptions to my models (and macros if applicable) -- [ ] I have added an entry to CHANGELOG.md From 3169bda0f91128129ade1fb966573d6b94fde5d7 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:28:53 -0700 Subject: [PATCH 23/28] Contributors shouldn't edit the `CHANGELOG.md` directly anymore (#240) --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 73c3613..0ea2496 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,7 +67,7 @@ See here for details for running existing integration tests and adding new ones: ## Adding CHANGELOG Entry -Unlike `dbt-core`, we edit the `CHANGELOG.md` directly. +We use [automatically generated release notes](https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes) to generate `CHANGELOG` entries. **Note:** Do not edit the `CHANGELOG.md` directly. Your modifications will be lost. You don't need to worry about which `dbt-codegen` version your change will go into. Just create the changelog entry at the top of CHANGELOG.md and open your PR against the `main` branch. All merged changes will be included in the next minor version of `dbt-codegen`. The maintainers _may_ choose to "backport" specific changes in order to patch older minor versions. In that case, a maintainer will take care of that backport after merging your PR, before releasing the new version of `dbt-codegen`. From af4e2c0e45226a9a0d560426b195486cdd946619 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:42:54 -0700 Subject: [PATCH 24/28] Remove the PR checklist items related to the type of change (#243) --- .github/pull_request_template.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 1d6e973..a95de33 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,13 +1,5 @@ resolves # -This is a: -- [ ] documentation update -- [ ] bug fix with no breaking changes -- [ ] new functionality -- [ ] a breaking change - -All pull requests from community contributors should target the `main` branch (default). - ## Description & motivation + +### Solution + ## Checklist -- [ ] This code is associated with an issue which has been triaged and [accepted for development](https://docs.getdbt.com/docs/contributing/oss-expectations#pull-requests). +- [ ] This code is associated with an [issue](https://github.com/dbt-labs/dbt-codegen/issues) which has been triaged and [accepted for development](https://docs.getdbt.com/docs/contributing/oss-expectations#pull-requests). - [ ] I have verified that these changes work locally - [ ] I have updated the README.md (if applicable) - [ ] I have added tests & descriptions to my models (and macros if applicable) From 4f04af07d3e598e00b02c838d3f82cf32a5b11e3 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:02:38 -0700 Subject: [PATCH 26/28] Align the pull request template with `dbt-utils` (#246) --- .github/pull_request_template.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5703c91..c49eb6a 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,6 +17,7 @@ resolves # ## Checklist - [ ] This code is associated with an [issue](https://github.com/dbt-labs/dbt-codegen/issues) which has been triaged and [accepted for development](https://docs.getdbt.com/docs/contributing/oss-expectations#pull-requests). -- [ ] I have verified that these changes work locally +- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-codegen/blob/main/CONTRIBUTING.md) and understand what's expected of me +- [ ] I have run this code in development and it appears to resolve the stated issue +- [ ] This PR includes tests, or tests are not required/relevant for this PR - [ ] I have updated the README.md (if applicable) -- [ ] I have added tests & descriptions to my models (and macros if applicable) From 95bb09354cae3b1cfec88b74de099f28940fc5b6 Mon Sep 17 00:00:00 2001 From: security-dbtlabs <136609081+security-dbtlabs@users.noreply.github.com> Date: Thu, 27 Feb 2025 22:00:18 -0500 Subject: [PATCH 27/28] Update CODEOWNERS file (#248) * Update CODEOWNERS file with global codeowner * Update .github/CODEOWNERS --------- Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0f94434..6d5d911 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @clrcrl +* @dbt-labs/dbt-package-owners From f6666ca441af7cbc01c02eb0e2b32043a9f412a7 Mon Sep 17 00:00:00 2001 From: "Bruno S. de Lima" Date: Wed, 2 Apr 2025 21:34:04 -0300 Subject: [PATCH 28/28] adding `generate_unit_test_template` macro (#251) * adding generate_unit_test_template_macro * added a relation exists check * adding arg to controle inline/multiline columns * Removing duplicated `get_resource_from_unique_id` macro * Add newline to end of file * Add a simple incremental model for testing * CI tests * Use dispatch, etc. * Usage documentation for `generate_unit_test_template` macro --------- Co-authored-by: Bruno Souza de Lima Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- README.md | 42 +++++ .../models/model_incremental.sql | 5 + .../test_generate_unit_test_template.sql | 28 ++++ ...enerate_unit_test_template_incremental.sql | 28 ++++ ...rate_unit_test_template_inline_columns.sql | 26 +++ ...e_unit_test_template_model_from_source.sql | 26 +++ ..._generate_unit_test_template_no_inputs.sql | 23 +++ .../tests/test_helper_get_models.sql | 3 +- macros/generate_unit_test_template.sql | 151 ++++++++++++++++++ macros/helpers/helpers.sql | 17 +- 10 files changed, 347 insertions(+), 2 deletions(-) create mode 100644 integration_tests/models/model_incremental.sql create mode 100644 integration_tests/tests/test_generate_unit_test_template.sql create mode 100644 integration_tests/tests/test_generate_unit_test_template_incremental.sql create mode 100644 integration_tests/tests/test_generate_unit_test_template_inline_columns.sql create mode 100644 integration_tests/tests/test_generate_unit_test_template_model_from_source.sql create mode 100644 integration_tests/tests/test_generate_unit_test_template_no_inputs.sql create mode 100644 macros/generate_unit_test_template.sql diff --git a/README.md b/README.md index ae1d8e0..438d248 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,9 @@ Macros that generate dbt code, and log it to the command line. - [generate_model_import_ctes (source)](#generate_model_import_ctes-source) - [Arguments:](#arguments-5) - [Usage:](#usage-5) + - [generate_unit_test_template (source)](#generate_unit_test_template-source) + - [Arguments:](#arguments-6) + - [Usage:](#usage-6) - [Contributing](#contributing) # Installation instructions @@ -394,6 +397,45 @@ select * from final 4. Replace the contents of the model's current SQL file with the compiled or logged code +## generate_unit_test_template ([source](macros/generate_unit_test_template.sql)) + +This macro generates the unit testing YAML for a given model with all references included as `given` inputs (along with their columns), plus the columns within the expected output. + +### Arguments: + +- `model_name` (required): The model you wish to generate unit testing YAML for. +- `inline_columns` (optional, default=False): Whether you want all columns on the same line. + +### Usage: + +1. Create a model with your original SQL query +2. Call the macro as an [operation](https://docs.getdbt.com/docs/using-operations): + +``` +$ dbt run-operation generate_unit_test_template --args '{"model_name": "order_items", "inline_columns": true}' +``` + +3. The new YAML - with all given inputs included - will be logged to the command line + +```yaml +unit_tests: + - name: unit_test_order_items + model: order_items + + given: + - input: ref("stg_order_items") + rows: + - col_a: + col_b: + + expect: + rows: + - id: +``` + +4. Create a new YAML file with the compiled or logged code. +5. Add column values for the given inputs and expected output. + ## Contributing To contirbute code to this package, please follow the steps outlined in the `integration_tests` directory's [README](https://github.com/dbt-labs/dbt-codegen/blob/main/integration_tests/README.md) file. diff --git a/integration_tests/models/model_incremental.sql b/integration_tests/models/model_incremental.sql new file mode 100644 index 0000000..407e284 --- /dev/null +++ b/integration_tests/models/model_incremental.sql @@ -0,0 +1,5 @@ +{{ config( + materialized='incremental' +) }} + +select 1 as id diff --git a/integration_tests/tests/test_generate_unit_test_template.sql b/integration_tests/tests/test_generate_unit_test_template.sql new file mode 100644 index 0000000..8a482b7 --- /dev/null +++ b/integration_tests/tests/test_generate_unit_test_template.sql @@ -0,0 +1,28 @@ +{% set actual_model_yaml = codegen.generate_unit_test_template( + model_name='child_model', + inline_columns=False + ) +%} + +-- depends_on: {{ ref('model_data_a') }} +-- depends_on: {{ ref('child_model') }} + +{% set expected_model_yaml %} +unit_tests: + - name: unit_test_child_model + model: child_model + + given: + - input: ref("model_data_a") + rows: + - col_a: + col_b: + + expect: + rows: + - col_a: + col_b: + +{% endset %} + +{{ assert_equal (actual_model_yaml | trim, expected_model_yaml | trim) }} diff --git a/integration_tests/tests/test_generate_unit_test_template_incremental.sql b/integration_tests/tests/test_generate_unit_test_template_incremental.sql new file mode 100644 index 0000000..433cbfb --- /dev/null +++ b/integration_tests/tests/test_generate_unit_test_template_incremental.sql @@ -0,0 +1,28 @@ +{% set actual_model_yaml = codegen.generate_unit_test_template( + model_name='model_incremental', + ) +%} + +-- depends_on: {{ ref('model_incremental') }} + +{% set expected_model_yaml %} +unit_tests: + - name: unit_test_model_incremental + model: model_incremental + + overrides: + macros: + is_incremental: true + + given: + - input: this + rows: + - id: + + expect: + rows: + - id: + +{% endset %} + +{{ assert_equal (actual_model_yaml | trim, expected_model_yaml | trim) }} diff --git a/integration_tests/tests/test_generate_unit_test_template_inline_columns.sql b/integration_tests/tests/test_generate_unit_test_template_inline_columns.sql new file mode 100644 index 0000000..b45cc74 --- /dev/null +++ b/integration_tests/tests/test_generate_unit_test_template_inline_columns.sql @@ -0,0 +1,26 @@ +{% set actual_model_yaml = codegen.generate_unit_test_template( + model_name='child_model', + inline_columns=True + ) +%} + +-- depends_on: {{ ref('model_data_a') }} +-- depends_on: {{ ref('child_model') }} + +{% set expected_model_yaml %} +unit_tests: + - name: unit_test_child_model + model: child_model + + given: + - input: ref("model_data_a") + rows: + - {col_a: , col_b: } + + expect: + rows: + - {col_a: , col_b: } + +{% endset %} + +{{ assert_equal (actual_model_yaml | trim, expected_model_yaml | trim) }} diff --git a/integration_tests/tests/test_generate_unit_test_template_model_from_source.sql b/integration_tests/tests/test_generate_unit_test_template_model_from_source.sql new file mode 100644 index 0000000..f89f196 --- /dev/null +++ b/integration_tests/tests/test_generate_unit_test_template_model_from_source.sql @@ -0,0 +1,26 @@ +{% set actual_model_yaml = codegen.generate_unit_test_template( + model_name='model_from_source', + ) +%} + +-- depends_on: {{ ref('model_from_source') }} + +{% set expected_model_yaml %} +unit_tests: + - name: unit_test_model_from_source + model: model_from_source + + given: + - input: source("codegen_integration_tests__data_source_schema", "codegen_integration_tests__data_source_table") + rows: + - my_integer_col: + my_bool_col: + + expect: + rows: + - my_integer_col: + my_bool_col: + +{% endset %} + +{{ assert_equal (actual_model_yaml | trim, expected_model_yaml | trim) }} diff --git a/integration_tests/tests/test_generate_unit_test_template_no_inputs.sql b/integration_tests/tests/test_generate_unit_test_template_no_inputs.sql new file mode 100644 index 0000000..e915655 --- /dev/null +++ b/integration_tests/tests/test_generate_unit_test_template_no_inputs.sql @@ -0,0 +1,23 @@ +{% set actual_model_yaml = codegen.generate_unit_test_template( + model_name='data__a_relation', + inline_columns=False + ) +%} + +-- depends_on: {{ ref('data__a_relation') }} + +{% set expected_model_yaml %} +unit_tests: + - name: unit_test_data__a_relation + model: data__a_relation + + given: [] + + expect: + rows: + - col_a: + col_b: + +{% endset %} + +{{ assert_equal (actual_model_yaml | trim, expected_model_yaml | trim) }} diff --git a/integration_tests/tests/test_helper_get_models.sql b/integration_tests/tests/test_helper_get_models.sql index 95c4d0c..1c0a32a 100644 --- a/integration_tests/tests/test_helper_get_models.sql +++ b/integration_tests/tests/test_helper_get_models.sql @@ -1,4 +1,5 @@ -- depends_on: {{ ref('model_data_a') }} +-- depends_on: {{ ref('model_incremental') }} -- depends_on: {{ ref('model_struct') }} -- depends_on: {{ ref('model_without_import_ctes') }} -- depends_on: {{ ref('model_without_any_ctes') }} @@ -7,6 +8,6 @@ {% set actual_list = codegen.get_models(prefix='model_')|sort %} {% endif %} -{% set expected_list = ['model_data_a', 'model_from_source', 'model_repeated', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %} +{% set expected_list = ['model_data_a', 'model_from_source', 'model_incremental', 'model_repeated', 'model_struct', 'model_without_any_ctes', 'model_without_import_ctes'] %} {{ assert_equal (actual_list, expected_list) }} diff --git a/macros/generate_unit_test_template.sql b/macros/generate_unit_test_template.sql new file mode 100644 index 0000000..36afba2 --- /dev/null +++ b/macros/generate_unit_test_template.sql @@ -0,0 +1,151 @@ +{% macro generate_unit_test_template(model_name, inline_columns=false) %} + {{ return(adapter.dispatch('generate_unit_test_template', 'codegen')(model_name, inline_columns)) }} +{% endmacro %} + +{% macro default__generate_unit_test_template(model_name, inline_columns=false) %} + + {%- set ns = namespace(depends_on_list = []) -%} + + {%- if execute -%} + + -- getting inputs and materialization info + {%- for node in graph.nodes.values() + | selectattr("resource_type", "equalto", "model") + | selectattr("name", "equalto", model_name) -%} + {%- set ns.depends_on_list = ns.depends_on_list + node.depends_on.nodes -%} + {%- set ns.this_materialization = node.config['materialized'] -%} + {%- endfor -%} + + {%- endif -%} + + -- getting the input columns + {%- set ns.input_columns_list = [] -%} + {%- for item in ns.depends_on_list -%} + {%- set input_columns_list = [] -%} + {%- set item_dict = codegen.get_resource_from_unique_id(item) -%} + {%- if item_dict.resource_type == 'source' %} + {%- set columns = adapter.get_columns_in_relation(source(item_dict.source_name, item_dict.identifier)) -%} + {%- else -%} + {%- set columns = adapter.get_columns_in_relation(ref(item_dict.alias)) -%} + {%- endif -%} + {%- for column in columns -%} + {{ input_columns_list.append(column.name) }} + {%- endfor -%} + {{ ns.input_columns_list.append(input_columns_list) }} + {%- endfor -%} + + -- getting 'this' columns + {% set relation_exists = load_relation(ref(model_name)) is not none %} + {% if relation_exists %} + {%- set ns.expected_columns_list = [] -%} + {%- set columns = adapter.get_columns_in_relation(ref(model_name)) -%} + {%- for column in columns -%} + {{ ns.expected_columns_list.append(column.name) }} + {%- endfor -%} + {% endif %} + + {%- set unit_test_yaml_template -%} +unit_tests: + - name: unit_test_{{ model_name }} + model: {{ model_name }} +{% if ns.this_materialization == 'incremental' %} + overrides: + macros: + is_incremental: true +{% else -%} + +{%- endif %} + given: {%- if ns.depends_on_list|length == 0 and ns.this_materialization != 'incremental' %} []{% endif %} + {%- for i in range(ns.depends_on_list|length) -%} + {%- set item_dict = codegen.get_resource_from_unique_id(ns.depends_on_list[i]) -%} + {% if item_dict.resource_type == 'source' %} + - input: source("{{item_dict.source_name}}", "{{item_dict.identifier}}") + rows: + {%- else %} + - input: ref("{{item_dict.alias}}") + rows: + {%- endif -%} + {%- if inline_columns -%} + {%- set ns.column_string = '- {' -%} + {%- for column_name in ns.input_columns_list[i] -%} + {%- if not loop.last -%} + {%- set ns.column_string = ns.column_string ~ column_name ~ ': , ' -%} + {%- else -%} + {%- set ns.column_string = ns.column_string ~ column_name ~ ': }' -%} + {%- endif -%} + {% endfor %} + {%- else -%} + {%- set ns.column_string = '' -%} + {%- for column_name in ns.input_columns_list[i] -%} + {%- if loop.first -%} + {%- set ns.column_string = ns.column_string ~ '- ' ~ column_name ~ ': ' -%} + {%- else -%} + {%- set ns.column_string = ns.column_string ~ '\n ' ~ column_name ~ ': ' -%} + {%- endif -%} + {% endfor %} + {%- endif %} + {{ns.column_string}} + {%- endfor %} + + {%- if ns.this_materialization == 'incremental' %} + - input: this + rows: + {%- if relation_exists -%} + {%- if inline_columns -%} + {%- set ns.column_string = '- {' -%} + {%- for column_name in ns.expected_columns_list -%} + {%- if not loop.last -%} + {%- set ns.column_string = ns.column_string ~ column_name ~ ': , ' -%} + {%- else -%} + {%- set ns.column_string = ns.column_string ~ column_name ~ ': }' -%} + {%- endif -%} + {% endfor %} + {%- else -%} + {%- set ns.column_string = '' -%} + {%- for column_name in ns.expected_columns_list -%} + {%- if loop.first -%} + {%- set ns.column_string = ns.column_string ~ '- ' ~ column_name ~ ': ' -%} + {%- else -%} + {%- set ns.column_string = ns.column_string ~ '\n ' ~ column_name ~ ': ' -%} + {%- endif -%} + {% endfor %} + {%- endif %} + {{ns.column_string}} + {%- endif %} + {%- endif %} + + expect: + rows: + {%- if relation_exists -%} + {%- if inline_columns -%} + {%- set ns.column_string = '- {' -%} + {%- for column_name in ns.expected_columns_list -%} + {%- if not loop.last -%} + {%- set ns.column_string = ns.column_string ~ column_name ~ ': , ' -%} + {%- else -%} + {%- set ns.column_string = ns.column_string ~ column_name ~ ': }' -%} + {%- endif -%} + {% endfor %} + {%- else -%} + {%- set ns.column_string = '' -%} + {%- for column_name in ns.expected_columns_list -%} + {%- if loop.first -%} + {%- set ns.column_string = ns.column_string ~ '- ' ~ column_name ~ ': ' -%} + {%- else -%} + {%- set ns.column_string = ns.column_string ~ '\n ' ~ column_name ~ ': ' -%} + {%- endif -%} + {% endfor %} + {%- endif %} + {{ns.column_string}} + {%- endif -%} + + {%- endset -%} + + {% if execute %} + + {{ print(unit_test_yaml_template) }} + {% do return(unit_test_yaml_template) %} + + {% endif %} + +{% endmacro %} diff --git a/macros/helpers/helpers.sql b/macros/helpers/helpers.sql index 086e57e..d7fa19e 100644 --- a/macros/helpers/helpers.sql +++ b/macros/helpers/helpers.sql @@ -48,7 +48,7 @@ {% set model_path = "/".join(model.path.split("/")[:-1]) %} {% if model_path == directory and model.name.startswith(prefix) %} {% do model_names.append(model.name) %} - {% endif %} + {% endif %} {% endfor %} {% elif directory %} {% for model in models %} @@ -88,3 +88,18 @@ {% set formatted = codegen.format_column(column) %} {{ return(formatted['data_type'] | lower) }} {% endmacro %} + +{# retrieve entire resource dictionary based on unique id #} +{% macro get_resource_from_unique_id(resource_unique_id) %} + {% set resource_type = resource_unique_id.split('.')[0] %} + {% if resource_type == 'source' %} + {% set resource = graph.sources[resource_unique_id] %} + {% elif resource_type == 'exposure' %} + {% set resource = graph.exposure[resource_unique_id] %} + {% elif resource_type == 'metric' %} + {% set resource = graph.metrics[resource_unique_id] %} + {% else %} + {% set resource = graph.nodes[resource_unique_id] %} + {% endif %} + {{ return(resource) }} +{% endmacro %}