Skip to content

Commit 0e6457c

Browse files
committed
infra updates
1 parent 8c3c109 commit 0e6457c

File tree

2 files changed

+151
-71
lines changed

2 files changed

+151
-71
lines changed

.github/workflows/databricks-dab.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ jobs:
172172
run: |
173173
databricks bundle validate -t ${{ steps.set-env.outputs.environment }}
174174
echo "✅ Bundle validation passed!"
175+
175176
# deploy-prod:
176177
# name: Deploy to Production
177178
# runs-on: ubuntu-latest

retail-job/databricks.yml

Lines changed: 150 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -28,84 +28,163 @@ resources:
2828
num_workers: 2
2929

3030
tasks:
31-
- task_key: "ingest_orders_task"
32-
description: "Create orders bronze table from source data"
33-
job_cluster_key: "main_cluster"
34-
notebook_task:
35-
notebook_path: "./Task Files/01_data_ingestion/orders_table_creation"
36-
source: WORKSPACE
31+
# - task_key: "ingest_orders_task"
32+
# description: "Create orders bronze table from source data"
33+
# job_cluster_key: "main_cluster"
34+
# notebook_task:
35+
# notebook_path: "./Task Files/01_data_ingestion/orders_table_creation"
36+
# source: WORKSPACE
3737

38-
- task_key: "ingest_sales_task"
39-
description: "Create sales bronze table from source data"
40-
job_cluster_key: "main_cluster"
41-
notebook_task:
42-
notebook_path: "./Task Files/01_data_ingestion/sales_table_creation"
43-
source: WORKSPACE
38+
# - task_key: "ingest_sales_task"
39+
# description: "Create sales bronze table from source data"
40+
# job_cluster_key: "main_cluster"
41+
# notebook_task:
42+
# notebook_path: "./Task Files/01_data_ingestion/sales_table_creation"
43+
# source: WORKSPACE
4444

45-
- task_key: "load_customers_task"
46-
description: "Load and create customers bronze table"
47-
job_cluster_key: "main_cluster"
48-
notebook_task:
49-
notebook_path: "./Task Files/02_data_loading/customers_table_creation"
50-
source: WORKSPACE
51-
depends_on:
52-
- task_key: "ingest_orders_task"
53-
- task_key: "ingest_sales_task"
45+
# - task_key: "load_customers_task"
46+
# description: "Load and create customers bronze table"
47+
# job_cluster_key: "main_cluster"
48+
# notebook_task:
49+
# notebook_path: "./Task Files/02_data_loading/customers_table_creation"
50+
# source: WORKSPACE
51+
# depends_on:
52+
# - task_key: "ingest_orders_task"
53+
# - task_key: "ingest_sales_task"
5454

55-
- task_key: "join_customers_sales_task"
56-
description: "Join customers and sales data"
57-
job_cluster_key: "main_cluster"
58-
notebook_task:
59-
notebook_path: "./Task Files/03_data_processing/join_customers_sales"
60-
source: WORKSPACE
61-
depends_on:
62-
- task_key: "load_customers_task"
55+
# - task_key: "join_customers_sales_task"
56+
# description: "Join customers and sales data"
57+
# job_cluster_key: "main_cluster"
58+
# notebook_task:
59+
# notebook_path: "./Task Files/03_data_processing/join_customers_sales"
60+
# source: WORKSPACE
61+
# depends_on:
62+
# - task_key: "load_customers_task"
6363

64-
- task_key: "join_customers_orders_task"
65-
description: "Join customers and orders data"
66-
job_cluster_key: "main_cluster"
67-
notebook_task:
68-
notebook_path: "./Task Files/03_data_processing/join_customers_orders"
69-
source: WORKSPACE
70-
depends_on:
71-
- task_key: "load_customers_task"
64+
# - task_key: "join_customers_orders_task"
65+
# description: "Join customers and orders data"
66+
# job_cluster_key: "main_cluster"
67+
# notebook_task:
68+
# notebook_path: "./Task Files/03_data_processing/join_customers_orders"
69+
# source: WORKSPACE
70+
# depends_on:
71+
# - task_key: "load_customers_task"
7272

73-
- task_key: "remove_duplicates_task"
74-
description: "Remove duplicate records from processed data"
75-
job_cluster_key: "main_cluster"
76-
notebook_task:
77-
notebook_path: "./Task Files/03_data_processing/remove_duplicates"
78-
source: WORKSPACE
79-
depends_on:
80-
- task_key: "join_customers_sales_task"
81-
condition_task:
82-
op: "EQUAL_TO"
83-
left: "{{tasks.join_customers_sales_task.values.has_duplicates}}"
84-
right: "true"
73+
# - task_key: "remove_duplicates_task"
74+
# description: "Remove duplicate records from processed data"
75+
# job_cluster_key: "main_cluster"
76+
# notebook_task:
77+
# notebook_path: "./Task Files/03_data_processing/remove_duplicates"
78+
# source: WORKSPACE
79+
# depends_on:
80+
# - task_key: "join_customers_sales_task"
81+
# condition_task:
82+
# op: "EQUAL_TO"
83+
# left: "{{tasks.join_customers_sales_task.values.has_duplicates}}"
84+
# right: "true"
8585

86-
- task_key: "transform_data_task"
87-
description: "Clean and transform data to gold layer"
88-
job_cluster_key: "main_cluster"
89-
notebook_task:
90-
notebook_path: "./Task Files/04_data_transformation/clean_and_transform"
91-
source: WORKSPACE
92-
depends_on:
93-
- task_key: "remove_duplicates_task"
86+
# - task_key: "transform_data_task"
87+
# description: "Clean and transform data to gold layer"
88+
# job_cluster_key: "main_cluster"
89+
# notebook_task:
90+
# notebook_path: "./Task Files/04_data_transformation/clean_and_transform"
91+
# source: WORKSPACE
92+
# depends_on:
93+
# - task_key: "remove_duplicates_task"
9494

95-
- task_key: "process_state_data_task"
96-
description: "Process orders data by state"
97-
job_cluster_key: "main_cluster"
98-
for_each_task:
99-
inputs: '["CA", "NY", "TX", "FL"]'
100-
task:
101-
task_key: "process_single_state"
102-
notebook_task:
103-
notebook_path: "./Task Files/05_state_processing/process_orders_by_state"
104-
source: WORKSPACE
105-
base_parameters:
106-
state: "{{item}}"
107-
depends_on:
108-
- task_key: "join_customers_orders_task"
95+
# - task_key: "process_state_data_task"
96+
# description: "Process orders data by state"
97+
# job_cluster_key: "main_cluster"
98+
# for_each_task:
99+
# inputs: '["CA", "NY", "TX", "FL"]'
100+
# task:
101+
# task_key: "process_single_state"
102+
# notebook_task:
103+
# notebook_path: "./Task Files/05_state_processing/process_orders_by_state"
104+
# source: WORKSPACE
105+
# base_parameters:
106+
# state: "{{item}}"
107+
# depends_on:
108+
# - task_key: "join_customers_orders_task"
109+
110+
- task_key: "ingest_orders_task"
111+
description: "Create orders bronze table from source data"
112+
job_cluster_key: "main_cluster"
113+
notebook_task:
114+
notebook_path: "./Task Files/01_data_ingestion/orders_table_creation.py"
115+
source: WORKSPACE
116+
117+
- task_key: "ingest_sales_task"
118+
description: "Create sales bronze table from source data"
119+
job_cluster_key: "main_cluster"
120+
notebook_task:
121+
notebook_path: "./Task Files/01_data_ingestion/sales_table_creation.py"
122+
source: WORKSPACE
123+
124+
- task_key: "load_customers_task"
125+
description: "Load and create customers bronze table"
126+
job_cluster_key: "main_cluster"
127+
notebook_task:
128+
notebook_path: "./Task Files/02_data_loading/customers_table_creation.py"
129+
source: WORKSPACE
130+
depends_on:
131+
- task_key: "ingest_orders_task"
132+
- task_key: "ingest_sales_task"
133+
134+
- task_key: "join_customers_sales_task"
135+
description: "Join customers and sales data"
136+
job_cluster_key: "main_cluster"
137+
notebook_task:
138+
notebook_path: "./Task Files/03_data_processing/join_customers_sales.py"
139+
source: WORKSPACE
140+
depends_on:
141+
- task_key: "load_customers_task"
142+
143+
- task_key: "join_customers_orders_task"
144+
description: "Join customers and orders data"
145+
job_cluster_key: "main_cluster"
146+
notebook_task:
147+
notebook_path: "./Task Files/03_data_processing/join_customers_orders.py"
148+
source: WORKSPACE
149+
depends_on:
150+
- task_key: "load_customers_task"
151+
152+
- task_key: "remove_duplicates_task"
153+
description: "Remove duplicate records from processed data"
154+
job_cluster_key: "main_cluster"
155+
notebook_task:
156+
notebook_path: "./Task Files/03_data_processing/remove_duplicates.py"
157+
source: WORKSPACE
158+
depends_on:
159+
- task_key: "join_customers_sales_task"
160+
condition_task:
161+
op: "EQUAL_TO"
162+
left: "{{tasks.join_customers_sales_task.values.has_duplicates}}"
163+
right: "true"
164+
165+
- task_key: "transform_data_task"
166+
description: "Clean and transform data to gold layer"
167+
job_cluster_key: "main_cluster"
168+
notebook_task:
169+
notebook_path: "./Task Files/04_data_transformation/clean_and_transform.py"
170+
source: WORKSPACE
171+
depends_on:
172+
- task_key: "remove_duplicates_task"
173+
174+
- task_key: "process_state_data_task"
175+
description: "Process orders data by state"
176+
job_cluster_key: "main_cluster"
177+
for_each_task:
178+
inputs: '["CA", "NY", "TX", "FL"]'
179+
task:
180+
task_key: "process_single_state"
181+
notebook_task:
182+
notebook_path: "./Task Files/05_state_processing/process_orders_by_state.py"
183+
source: WORKSPACE
184+
base_parameters:
185+
state: "{{item}}"
186+
depends_on:
187+
- task_key: "join_customers_orders_task"
109188

110189
timeout_seconds: 3600
111190
max_concurrent_runs: 1

0 commit comments

Comments
 (0)