@@ -28,84 +28,163 @@ resources:
2828 num_workers : 2
2929
3030 tasks :
31- - task_key : " ingest_orders_task"
32- description : " Create orders bronze table from source data"
33- job_cluster_key : " main_cluster"
34- notebook_task :
35- notebook_path : " ./Task Files/01_data_ingestion/orders_table_creation"
36- source : WORKSPACE
31+ # - task_key: "ingest_orders_task"
32+ # description: "Create orders bronze table from source data"
33+ # job_cluster_key: "main_cluster"
34+ # notebook_task:
35+ # notebook_path: "./Task Files/01_data_ingestion/orders_table_creation"
36+ # source: WORKSPACE
3737
38- - task_key : " ingest_sales_task"
39- description : " Create sales bronze table from source data"
40- job_cluster_key : " main_cluster"
41- notebook_task :
42- notebook_path : " ./Task Files/01_data_ingestion/sales_table_creation"
43- source : WORKSPACE
38+ # - task_key: "ingest_sales_task"
39+ # description: "Create sales bronze table from source data"
40+ # job_cluster_key: "main_cluster"
41+ # notebook_task:
42+ # notebook_path: "./Task Files/01_data_ingestion/sales_table_creation"
43+ # source: WORKSPACE
4444
45- - task_key : " load_customers_task"
46- description : " Load and create customers bronze table"
47- job_cluster_key : " main_cluster"
48- notebook_task :
49- notebook_path : " ./Task Files/02_data_loading/customers_table_creation"
50- source : WORKSPACE
51- depends_on :
52- - task_key : " ingest_orders_task"
53- - task_key : " ingest_sales_task"
45+ # - task_key: "load_customers_task"
46+ # description: "Load and create customers bronze table"
47+ # job_cluster_key: "main_cluster"
48+ # notebook_task:
49+ # notebook_path: "./Task Files/02_data_loading/customers_table_creation"
50+ # source: WORKSPACE
51+ # depends_on:
52+ # - task_key: "ingest_orders_task"
53+ # - task_key: "ingest_sales_task"
5454
55- - task_key : " join_customers_sales_task"
56- description : " Join customers and sales data"
57- job_cluster_key : " main_cluster"
58- notebook_task :
59- notebook_path : " ./Task Files/03_data_processing/join_customers_sales"
60- source : WORKSPACE
61- depends_on :
62- - task_key : " load_customers_task"
55+ # - task_key: "join_customers_sales_task"
56+ # description: "Join customers and sales data"
57+ # job_cluster_key: "main_cluster"
58+ # notebook_task:
59+ # notebook_path: "./Task Files/03_data_processing/join_customers_sales"
60+ # source: WORKSPACE
61+ # depends_on:
62+ # - task_key: "load_customers_task"
6363
64- - task_key : " join_customers_orders_task"
65- description : " Join customers and orders data"
66- job_cluster_key : " main_cluster"
67- notebook_task :
68- notebook_path : " ./Task Files/03_data_processing/join_customers_orders"
69- source : WORKSPACE
70- depends_on :
71- - task_key : " load_customers_task"
64+ # - task_key: "join_customers_orders_task"
65+ # description: "Join customers and orders data"
66+ # job_cluster_key: "main_cluster"
67+ # notebook_task:
68+ # notebook_path: "./Task Files/03_data_processing/join_customers_orders"
69+ # source: WORKSPACE
70+ # depends_on:
71+ # - task_key: "load_customers_task"
7272
73- - task_key : " remove_duplicates_task"
74- description : " Remove duplicate records from processed data"
75- job_cluster_key : " main_cluster"
76- notebook_task :
77- notebook_path : " ./Task Files/03_data_processing/remove_duplicates"
78- source : WORKSPACE
79- depends_on :
80- - task_key : " join_customers_sales_task"
81- condition_task :
82- op : " EQUAL_TO"
83- left : " {{tasks.join_customers_sales_task.values.has_duplicates}}"
84- right : " true"
73+ # - task_key: "remove_duplicates_task"
74+ # description: "Remove duplicate records from processed data"
75+ # job_cluster_key: "main_cluster"
76+ # notebook_task:
77+ # notebook_path: "./Task Files/03_data_processing/remove_duplicates"
78+ # source: WORKSPACE
79+ # depends_on:
80+ # - task_key: "join_customers_sales_task"
81+ # condition_task:
82+ # op: "EQUAL_TO"
83+ # left: "{{tasks.join_customers_sales_task.values.has_duplicates}}"
84+ # right: "true"
8585
86- - task_key : " transform_data_task"
87- description : " Clean and transform data to gold layer"
88- job_cluster_key : " main_cluster"
89- notebook_task :
90- notebook_path : " ./Task Files/04_data_transformation/clean_and_transform"
91- source : WORKSPACE
92- depends_on :
93- - task_key : " remove_duplicates_task"
86+ # - task_key: "transform_data_task"
87+ # description: "Clean and transform data to gold layer"
88+ # job_cluster_key: "main_cluster"
89+ # notebook_task:
90+ # notebook_path: "./Task Files/04_data_transformation/clean_and_transform"
91+ # source: WORKSPACE
92+ # depends_on:
93+ # - task_key: "remove_duplicates_task"
9494
95- - task_key : " process_state_data_task"
96- description : " Process orders data by state"
97- job_cluster_key : " main_cluster"
98- for_each_task :
99- inputs : ' ["CA", "NY", "TX", "FL"]'
100- task :
101- task_key : " process_single_state"
102- notebook_task :
103- notebook_path : " ./Task Files/05_state_processing/process_orders_by_state"
104- source : WORKSPACE
105- base_parameters :
106- state : " {{item}}"
107- depends_on :
108- - task_key : " join_customers_orders_task"
95+ # - task_key: "process_state_data_task"
96+ # description: "Process orders data by state"
97+ # job_cluster_key: "main_cluster"
98+ # for_each_task:
99+ # inputs: '["CA", "NY", "TX", "FL"]'
100+ # task:
101+ # task_key: "process_single_state"
102+ # notebook_task:
103+ # notebook_path: "./Task Files/05_state_processing/process_orders_by_state"
104+ # source: WORKSPACE
105+ # base_parameters:
106+ # state: "{{item}}"
107+ # depends_on:
108+ # - task_key: "join_customers_orders_task"
109+
110+ - task_key : " ingest_orders_task"
111+ description : " Create orders bronze table from source data"
112+ job_cluster_key : " main_cluster"
113+ notebook_task :
114+ notebook_path : " ./Task Files/01_data_ingestion/orders_table_creation.py"
115+ source : WORKSPACE
116+
117+ - task_key : " ingest_sales_task"
118+ description : " Create sales bronze table from source data"
119+ job_cluster_key : " main_cluster"
120+ notebook_task :
121+ notebook_path : " ./Task Files/01_data_ingestion/sales_table_creation.py"
122+ source : WORKSPACE
123+
124+ - task_key : " load_customers_task"
125+ description : " Load and create customers bronze table"
126+ job_cluster_key : " main_cluster"
127+ notebook_task :
128+ notebook_path : " ./Task Files/02_data_loading/customers_table_creation.py"
129+ source : WORKSPACE
130+ depends_on :
131+ - task_key : " ingest_orders_task"
132+ - task_key : " ingest_sales_task"
133+
134+ - task_key : " join_customers_sales_task"
135+ description : " Join customers and sales data"
136+ job_cluster_key : " main_cluster"
137+ notebook_task :
138+ notebook_path : " ./Task Files/03_data_processing/join_customers_sales.py"
139+ source : WORKSPACE
140+ depends_on :
141+ - task_key : " load_customers_task"
142+
143+ - task_key : " join_customers_orders_task"
144+ description : " Join customers and orders data"
145+ job_cluster_key : " main_cluster"
146+ notebook_task :
147+ notebook_path : " ./Task Files/03_data_processing/join_customers_orders.py"
148+ source : WORKSPACE
149+ depends_on :
150+ - task_key : " load_customers_task"
151+
152+ - task_key : " remove_duplicates_task"
153+ description : " Remove duplicate records from processed data"
154+ job_cluster_key : " main_cluster"
155+ notebook_task :
156+ notebook_path : " ./Task Files/03_data_processing/remove_duplicates.py"
157+ source : WORKSPACE
158+ depends_on :
159+ - task_key : " join_customers_sales_task"
160+ condition_task :
161+ op : " EQUAL_TO"
162+ left : " {{tasks.join_customers_sales_task.values.has_duplicates}}"
163+ right : " true"
164+
165+ - task_key : " transform_data_task"
166+ description : " Clean and transform data to gold layer"
167+ job_cluster_key : " main_cluster"
168+ notebook_task :
169+ notebook_path : " ./Task Files/04_data_transformation/clean_and_transform.py"
170+ source : WORKSPACE
171+ depends_on :
172+ - task_key : " remove_duplicates_task"
173+
174+ - task_key : " process_state_data_task"
175+ description : " Process orders data by state"
176+ job_cluster_key : " main_cluster"
177+ for_each_task :
178+ inputs : ' ["CA", "NY", "TX", "FL"]'
179+ task :
180+ task_key : " process_single_state"
181+ notebook_task :
182+ notebook_path : " ./Task Files/05_state_processing/process_orders_by_state.py"
183+ source : WORKSPACE
184+ base_parameters :
185+ state : " {{item}}"
186+ depends_on :
187+ - task_key : " join_customers_orders_task"
109188
110189 timeout_seconds : 3600
111190 max_concurrent_runs : 1
0 commit comments