Skip to content

Commit 05fcaa2

Browse files
committed
Add management schema feature
add noop and warn tests improve tests rename tests add view dropping test add unmanaged schema test make tests more dry Delete tmp.csv Manage schemas is optional Add --target-path as a CLI option. (#5402) Include py.typed in MANIFEST.in (#5703) This enables packages that install dbt-core from pypi to use mypy. wip: move manage logic to separate command Add manage command
1 parent 5339882 commit 05fcaa2

9 files changed

Lines changed: 415 additions & 5 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
kind: Features
2+
body: Added a `manage` CLI command that allows users to drop unused database relations
3+
time: 2022-09-20T12:25:29.226182+02:00
4+
custom:
5+
Author: agoblet bneijt
6+
Issue: "4957"
7+
PR: "5392"

core/dbt/config/project.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from dbt.contracts.project import (
3838
Project as ProjectContract,
3939
SemverString,
40+
SchemaManagementConfiguration,
4041
)
4142
from dbt.contracts.project import PackageConfig, ProjectPackageMetadata
4243
from dbt.contracts.publication import ProjectDependencies
@@ -429,6 +430,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
429430
model_paths, seed_paths, snapshot_paths, analysis_paths, macro_paths
430431
)
431432

433+
managed_schemas: List[SchemaManagementConfiguration] = value_or(cfg.managed_schemas, [])
432434
docs_paths: List[str] = value_or(cfg.docs_paths, all_source_paths)
433435
asset_paths: List[str] = value_or(cfg.asset_paths, [])
434436
flags = get_flags()
@@ -503,6 +505,7 @@ def create_project(self, rendered: RenderComponents) -> "Project":
503505
asset_paths=asset_paths,
504506
target_path=target_path,
505507
snapshot_paths=snapshot_paths,
508+
managed_schemas=managed_schemas,
506509
clean_targets=clean_targets,
507510
log_path=log_path,
508511
packages_install_path=packages_install_path,
@@ -618,6 +621,7 @@ class Project:
618621
asset_paths: List[str]
619622
target_path: str
620623
snapshot_paths: List[str]
624+
managed_schemas: List[SchemaManagementConfiguration]
621625
clean_targets: List[str]
622626
log_path: str
623627
packages_install_path: str
@@ -695,6 +699,7 @@ def to_project_config(self, with_packages=False):
695699
"asset-paths": self.asset_paths,
696700
"target-path": self.target_path,
697701
"snapshot-paths": self.snapshot_paths,
702+
"managed-schemas": [schema.to_dict() for schema in self.managed_schemas],
698703
"clean-targets": self.clean_targets,
699704
"log-path": self.log_path,
700705
"quoting": self.quoting,

core/dbt/config/runtime.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def from_parts(
148148
asset_paths=project.asset_paths,
149149
target_path=project.target_path,
150150
snapshot_paths=project.snapshot_paths,
151+
managed_schemas=project.managed_schemas,
151152
clean_targets=project.clean_targets,
152153
log_path=project.log_path,
153154
packages_install_path=project.packages_install_path,

core/dbt/contracts/project.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
HyphenatedDbtClassMixin,
88
ExtensibleDbtClassMixin,
99
register_pattern,
10+
StrEnum,
1011
)
1112
from dataclasses import dataclass, field
1213
from typing import Optional, List, Dict, Union, Any
@@ -181,6 +182,19 @@ class RegistryPackageMetadata(
181182
}
182183

183184

185+
class PruneModelsAction(StrEnum):
186+
SKIP = "skip"
187+
DROP = "drop"
188+
WARN = "warn"
189+
190+
191+
@dataclass
192+
class SchemaManagementConfiguration(HyphenatedDbtClassMixin, Replaceable):
193+
database: Optional[str] = None
194+
schema: Optional[str] = None
195+
prune_models: Optional[PruneModelsAction] = None
196+
197+
184198
@dataclass
185199
class Project(HyphenatedDbtClassMixin, Replaceable):
186200
name: Identifier
@@ -198,6 +212,7 @@ class Project(HyphenatedDbtClassMixin, Replaceable):
198212
asset_paths: Optional[List[str]] = None
199213
target_path: Optional[str] = None
200214
snapshot_paths: Optional[List[str]] = None
215+
managed_schemas: Optional[List[SchemaManagementConfiguration]] = None
201216
clean_targets: Optional[List[str]] = None
202217
profile: Optional[str] = None
203218
log_path: Optional[str] = None

core/dbt/task/manage.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# coding=utf-8
2+
from typing import Dict, Set, Tuple
3+
4+
from .compile import CompileTask
5+
from .runnable import ManifestTask
6+
from dbt.exceptions import warn_or_error, ValidationException
7+
from dbt.adapters.factory import get_adapter
8+
from dbt.contracts.graph.parsed import (
9+
ParsedModelNode,
10+
)
11+
from dbt.contracts.project import PruneModelsAction
12+
13+
14+
class ManageTask(CompileTask):
15+
def run(self):
16+
ManifestTask._runtime_initialize(self)
17+
models_in_codebase = self.manifest.nodes.keys()
18+
adapter = get_adapter(self.config)
19+
20+
with adapter.connection_named("master"):
21+
required_schemas = self.get_model_schemas(adapter, models_in_codebase)
22+
self.populate_adapter_cache(adapter, required_schemas)
23+
24+
adapter.clear_transaction()
25+
self._prune_models(adapter)
26+
27+
def _prune_models(self, adapter):
28+
self._assert_schema_uniqueness()
29+
30+
if len(self.config.managed_schemas) == 0:
31+
warn_or_error("No schema's configured to manage")
32+
return
33+
34+
models_in_codebase: Set[Tuple[str, str, str]] = set(
35+
(n.config.database, n.config.schema, n.config.alias)
36+
for n in self.manifest.nodes.values()
37+
if isinstance(n, ParsedModelNode)
38+
)
39+
40+
# get default 'database' + 'schema' for active target
41+
creds = adapter.connections.profile.credentials
42+
default_database, default_schema = creds.database, creds.schema
43+
44+
for config in self.config.managed_schemas:
45+
database = config.database or default_database
46+
schema = config.schema or default_schema
47+
48+
models_in_database: Dict[Tuple[str, str, str], str] = {
49+
(database, schema, relation.identifier): relation
50+
for relation in adapter.list_relations(database, schema)
51+
}
52+
if len(models_in_database) == 0:
53+
warn_or_error(
54+
f"No objects in managed schema '{database}.{schema}'"
55+
)
56+
57+
should_act_upon = models_in_database.keys() - models_in_codebase
58+
59+
for (target_database, target_schema, target_identifier) in sorted(should_act_upon):
60+
target_action = config.prune_models or PruneModelsAction.SKIP
61+
if target_action == PruneModelsAction.WARN:
62+
warn_or_error(
63+
f"Found unused model {target_database}.{target_schema}.{target_identifier}"
64+
)
65+
elif target_action == PruneModelsAction.DROP:
66+
adapter.drop_relation(
67+
models_in_database[(target_database, target_schema, target_identifier)]
68+
)
69+
70+
def _assert_schema_uniqueness(self):
71+
schemas = set()
72+
73+
for config in self.config.managed_schemas:
74+
schema = (config.database, config.schema)
75+
if schema in schemas:
76+
raise ValidationException(f"Duplicate schema found: {schema}")
77+
schemas.add(schema)
78+
79+
def interpret_results(self, results):
80+
return True

core/dbt/task/run.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@
2121
from dbt.contracts.graph.nodes import HookNode, ResultNode
2222
from dbt.contracts.results import NodeStatus, RunResult, RunStatus, RunningStatus, BaseResult
2323
from dbt.exceptions import (
24-
CompilationError,
25-
DbtInternalError,
26-
MissingMaterializationError,
27-
DbtRuntimeError,
28-
DbtValidationError,
24+
CompilationException,
25+
InternalException,
26+
RuntimeException,
27+
ValidationException,
28+
missing_materialization,
2929
)
3030
from dbt.events.functions import fire_event, get_invocation_id
3131
from dbt.events.types import (

core/dbt/tests/fixtures/project.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,11 @@ def get_tables_in_schema(self):
464464
result = self.run_sql(sql, fetch="all")
465465
return {model_name: materialization for (model_name, materialization) in result}
466466

467+
def update_models(self, models: dict):
468+
"""Update the modules in the test project"""
469+
self.project_root.join("models").remove()
470+
write_project_files(self.project_root, "models", models)
471+
467472

468473
# This is the main fixture that is used in all functional tests. It pulls in the other
469474
# fixtures that are necessary to set up a dbt project, and saves some of the information
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Test schema management as introduced by https://github.com/dbt-labs/dbt-core/issues/4957

0 commit comments

Comments
 (0)