diff --git a/README.md b/README.md index 4d8e95894..03d99260d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,20 @@ # Analytics engineering with dbt -Template repository for the projects and environment of the course: Analytics engineering with dbt +Questions to week 1 project: +Q: How many users do we have? +A: 130 + +Q: On average, how many orders do we receive per hour? +A: 7.52 + +Q: On average, how long does an order take from being placed to being delivered? +A: 3.89 + +Q: How many users have only made one purchase? Two purchases? Three+ purchases? +A: 1 order- 25, 2 orders - 28, 3+ orders - 71 + +Q: On average, how many unique sessions do we have per hour? +A: 16.33 > Please note that this sets some environment variables so if you create some new terminals please load them again. diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..617703e5e --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,42 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ directory +# as tables. These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view + + post-hook: + - "{{ grant(role='reporting') }}" + diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/macros/grant_usage.sql b/greenery/macros/grant_usage.sql new file mode 100644 index 000000000..7bd5a6120 --- /dev/null +++ b/greenery/macros/grant_usage.sql @@ -0,0 +1,10 @@ +{% macro grant(role) %} + + {% set sql %} + GRANT USAGE ON SCHEMA {{ schema }} TO ROLE {{ role }}; + GRANT SELECT ON {{ this }} TO ROLE {{ role }}; + {% endset %} + + {% set table = run_query(sql) %} + +{% endmacro %} \ No newline at end of file diff --git a/greenery/macros/row_to_columns.sql b/greenery/macros/row_to_columns.sql new file mode 100644 index 000000000..cc52415e9 --- /dev/null +++ b/greenery/macros/row_to_columns.sql @@ -0,0 +1,18 @@ +{% macro row_to_columns(table_name,column_name,count_column) %} + + {% set new_columns = dbt_utils.get_column_values( + table = ref( table_name ), + column = column_name + ) %} + {% for new_column in new_columns %} + count( case when {{ column_name }} = '{{ new_column }}' + then {{count_column}} + else NULL + end ) as {{new_column}}_count + {% if not loop.last %} + , + {% endif %} + {% endfor %} + +{% endmacro %} + diff --git a/greenery/models/example/marts/Core/core_models.yml b/greenery/models/example/marts/Core/core_models.yml new file mode 100644 index 000000000..b7de8e906 --- /dev/null +++ b/greenery/models/example/marts/Core/core_models.yml @@ -0,0 +1,29 @@ +version: 2 + +models: + - name: dim_products + description: "Products we sell" + columns: + - name: product_guid + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: dim_users + description: "Prospective and purchasing customers " + columns: + - name: user_guid + description: "The primary key for this table" + tests: + - unique + - not_null + - name: fact_orders + description: "Orders that have been placed " + columns: + - name: order_guid + description: "The primary key for this table" + tests: + - unique + - not_null + diff --git a/greenery/models/example/marts/Core/dim_products.sql b/greenery/models/example/marts/Core/dim_products.sql new file mode 100644 index 000000000..f2b651d0c --- /dev/null +++ b/greenery/models/example/marts/Core/dim_products.sql @@ -0,0 +1,16 @@ +{{ config(materialized='table') }} + +with product as ( + select + * + from {{ref('src_products')}} +) + + + +SELECT + product_guid + ,product_name + ,price as product_price + ,inventory as product_inventory +FROM product diff --git a/greenery/models/example/marts/Core/dim_users.sql b/greenery/models/example/marts/Core/dim_users.sql new file mode 100644 index 000000000..4d7bf4fdc --- /dev/null +++ b/greenery/models/example/marts/Core/dim_users.sql @@ -0,0 +1,52 @@ +{{ config(materialized='table') }} + + +with users as ( + SELECT + * + from {{ref('src_users')}} +) + +,first_order AS ( + SELECT + DISTINCT + USER_GUID, + CREATED_AT_TSTAMP_EST as FIRST_ORDER_CREATED_TSTAMP_EST + FROM {{ref('fact_orders')}} + WHERE USER_ORDER_RANK = 1 +) + +,first_delivered_order AS ( + SELECT + DISTINCT + USER_GUID, + DELIVERED_AT_TSTAMP_EST as FIRST_ORDER_DELIVERED_TSTAMP_EST + FROM {{ref('fact_orders')}} + WHERE USER_DELIVERED_RANK = 1 +) + + +,combined AS ( + select + u.USER_GUID + ,u.FIRST_NAME + ,u.LAST_NAME + ,u.FULL_NAME + ,u.EMAIL + ,u.PHONE_NUMBER + ,u.CREATED_AT_TSTAMP_EST as SYSTEM_CREATED_TSTAMP_EST + ,u.UPDATED_AT_TSTAMP_EST AS SYSTEM_UPDATED_TSTAMP_EST + ,fo.FIRST_ORDER_CREATED_TSTAMP_EST + ,fd.FIRST_ORDER_DELIVERED_TSTAMP_EST + from users u + left join first_order fo + on u.user_guid = fo.user_guid + left join first_delivered_order fd + on u.user_guid = fd.user_guid + +) + + +SELECT +* +from combined \ No newline at end of file diff --git a/greenery/models/example/marts/Core/fact_orders.sql b/greenery/models/example/marts/Core/fact_orders.sql new file mode 100644 index 000000000..efdd06ea6 --- /dev/null +++ b/greenery/models/example/marts/Core/fact_orders.sql @@ -0,0 +1,58 @@ +{{ config(materialized='table') }} + +with int_order_table as ( + SELECT + * + from {{ref('int_fact_orders')}} +) + +,additions as ( + SELECT + + --ids + o.ORDER_GUID + ,o.USER_GUID + ,o.PROMO_GUID + ,o.ADDRESS_GUID + ,o.TRACKING_GUID + --timestamps + ,o.CREATED_AT_TSTAMP_EST + ,o.DELIVERED_AT_TSTAMP_EST + ,o.ESTIMATED_DELIVERY_TSTAMP_EST + --user + ,o.CUSTOMER_FULL_NAME + ,o.SHIP_ADDRESS + ,o.SHIP_ZIPCODE + ,o.SHIP_STATE + ,o.SHIP_COUNTRY + + --order details + ,o.SHIPPING_SERVICE + ,o.STATUS + ,case when estimated_delivery_tstamp_est < delivered_at_tstamp_est then 'Delivered Late' + when estimated_delivery_tstamp_est >= delivered_at_tstamp_est then 'Delivered On Time' + when delivered_at_tstamp_est is null and estimated_delivery_tstamp_est >= current_timestamp() then 'Pending delivery - On Time' + when delivered_at_tstamp_est is null and estimated_delivery_tstamp_est < current_timestamp() then 'Pending delivery - Late' + when estimated_delivery_tstamp_est is null then 'No estimated delivery given' + else 'NA' + end as delivery_timeframe_compliance + ,rank() over (partition by user_guid order by created_at_tstamp_est) as user_order_rank + ,case when rank() over (partition by user_guid order by created_at_tstamp_est desc) = 1 then TRUE else FALSE end as user_most_recent_order_flag + ,case + when delivered_at_tstamp_est is not null then rank() over (partition by user_guid order by delivered_at_tstamp_est) + end as user_delivered_rank + + --numbers + ,o.ORDER_COST + ,o.ORDER_DISCOUNT_AMT + ,o.PRE_DISCOUNT_TOTAL + ,o.CUSTOMER_SHIPPING_COST + ,o.CUSTOMER_ORDER_TOTAL + + from int_order_table o + +) + +SELECT +* +from additions \ No newline at end of file diff --git a/greenery/models/example/marts/Marketing/marketing_models.yml b/greenery/models/example/marts/Marketing/marketing_models.yml new file mode 100644 index 000000000..39a4ba658 --- /dev/null +++ b/greenery/models/example/marts/Marketing/marketing_models.yml @@ -0,0 +1,12 @@ + +version: 2 + +models: + - name: user_order_fact + description: "High level information about our users such as most recent order, how many total orders, and customer attributes " + columns: + - name: user_guid + description: "The primary key for this table" + tests: + - unique + - not_null \ No newline at end of file diff --git a/greenery/models/example/marts/Marketing/user_order_fact.sql b/greenery/models/example/marts/Marketing/user_order_fact.sql new file mode 100644 index 000000000..b567bf2f4 --- /dev/null +++ b/greenery/models/example/marts/Marketing/user_order_fact.sql @@ -0,0 +1,55 @@ +{{ config(materialized='table') }} + +with user_most_recent_orders as ( + select + user_guid, + ship_address, + ship_state, + ship_zipcode, + ship_country, + created_at_tstamp_est as last_order_tstamp_est + from {{ref('fact_orders')}} fo + where user_most_recent_order_flag = TRUE +) + +,user_info as ( + select + * + from {{ref('dim_users')}} + +) + +,user_order_history as ( + select + user_guid, + count(distinct order_guid) as order_count, + count(distinct case when delivered_at_tstamp_est is not null then order_guid end) as delivered_order_count + from {{ref('fact_orders')}} + group by 1 +) + +,combined as ( + select + ui.user_guid, + ui.full_name, + ui.email, + ui.phone_number, + ui.first_order_created_tstamp_est, + ui.first_order_delivered_tstamp_est, + uo.ship_address as last_ship_address, + uo.ship_state as last_ship_state, + uo.ship_zipcode as last_ship_zipcode, + uo.ship_country as last_ship_country, + uo.last_order_tstamp_est, + coalesce(oh.order_count,0) as order_count, + coalesce(oh.delivered_order_count,0) as delivered_order_count + from user_info ui + left join user_order_history oh + on ui.user_guid = oh.user_guid + left join user_most_recent_orders uo + on ui.user_guid = uo.user_guid +) + +SELECT +* +from combined \ No newline at end of file diff --git a/greenery/models/example/marts/Product/fact_page_views.sql b/greenery/models/example/marts/Product/fact_page_views.sql new file mode 100644 index 000000000..c29b24c33 --- /dev/null +++ b/greenery/models/example/marts/Product/fact_page_views.sql @@ -0,0 +1,49 @@ +{{ config(materialized='table') }} + +with website_events as ( + SELECT + * + from {{ref('src_events')}} + where lower(event_type) = 'page_view' +) + +,products as ( + SELECT + * + from {{ref('dim_products')}} +) + +,users as ( + SELECT + * + from {{ref('dim_users')}} +) + +,combined as ( + select + --about event + e.event_guid + ,e.session_guid + ,e.created_at_tstamp_est as event_tstamp_est + ,e.page_url + + --about user + ,e.user_guid + ,u.first_order_created_tstamp_est as user_first_order_tstamp_est + ,u.email as user_email + --about order + ,order_guid + + --about product + ,e.product_guid + ,p.product_name + from website_events e + left join products p + on e.product_guid = p.product_guid + left join users u + on e.user_guid = u.user_guid +) + +SELECT +* +from combined \ No newline at end of file diff --git a/greenery/models/example/marts/Product/fact_site_sessions.sql b/greenery/models/example/marts/Product/fact_site_sessions.sql new file mode 100644 index 000000000..d664ec645 --- /dev/null +++ b/greenery/models/example/marts/Product/fact_site_sessions.sql @@ -0,0 +1,51 @@ +{{ config(materialized='table') }} + + +with src_events as ( + select + * + FROM + {{ref('src_events')}} +) + + +,dim_users as ( + SELECT + * + from {{ref('dim_users')}} +) + +,sessions as ( + +select + e.session_guid, + e.user_guid, + min(e.created_at_tstamp_est) as session_start_tstamp_est, + max(e.created_at_tstamp_est) as session_end_tstamp_est, + {{ datediff('session_start_tstamp_est', 'session_end_tstamp_est', 'second') }} as session_duration_in_s, + case + when session_duration_in_s between 0 and 9 then '0s to 9s' + when session_duration_in_s between 10 and 29 then '10s to 29s' + when session_duration_in_s between 30 and 59 then '30s to 59s' + when session_duration_in_s > 59 then '60s or more' + else null + end as session_duration_in_s_tier, + {{row_to_columns('src_events','event_type','event_guid')}} +from src_events e +{{ dbt_utils.group_by(n=2) }} + +) + + +SELECT +s.*, +u.FIRST_ORDER_CREATED_TSTAMP_EST as user_first_ordered_tstamp_est, +case + when session_end_tstamp_est < user_first_ordered_tstamp_est then 'session pre purchase' + when session_end_tstamp_est >= user_first_ordered_tstamp_est and session_start_tstamp_est < user_first_ordered_tstamp_est then 'first purchase session' + when session_start_tstamp_est > user_first_ordered_tstamp_est then 'returning customer' + when user_first_ordered_tstamp_est is null then 'prospective customer' +end as user_type +from sessions s +left join dim_users u +on s.user_guid = u.user_guid \ No newline at end of file diff --git a/greenery/models/example/marts/Product/product_models.yml b/greenery/models/example/marts/Product/product_models.yml new file mode 100644 index 000000000..6343b332c --- /dev/null +++ b/greenery/models/example/marts/Product/product_models.yml @@ -0,0 +1,20 @@ +version: 2 + +models: + - name: fact_page_views + description: "Pages that have been viewed on the website" + columns: + - name: event_guid + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: fact_site_sessions + description: "Site events grouped by session" + columns: + - name: session_guid + description: "The primary key for this table" + tests: + - unique + - not_null \ No newline at end of file diff --git a/greenery/models/example/staging/int_fact_orders.sql b/greenery/models/example/staging/int_fact_orders.sql new file mode 100644 index 000000000..509d5b510 --- /dev/null +++ b/greenery/models/example/staging/int_fact_orders.sql @@ -0,0 +1,66 @@ +{{ config(materialized='view') }} + +with orders as ( + SELECT + * + from {{ref('src_orders')}} +) + +,addresses as ( + SELECT + * + from {{ref('src_addresses')}} +) + +,promos as ( + SELECT + * + from {{ref('src_promos')}} +) + +,users as ( + SELECT + * + from {{ref('src_users')}} +) + +,combined as ( + SELECT + --ids + o.ORDER_GUID + ,o.USER_GUID + ,o.PROMO_GUID + ,o.ADDRESS_GUID + ,o.TRACKING_GUID + --timestamps + ,o.CREATED_AT_TSTAMP_EST + ,o.DELIVERED_AT_TSTAMP_EST + ,o.ESTIMATED_DELIVERY_TSTAMP_EST + --user + ,u.FULL_NAME AS CUSTOMER_FULL_NAME + ,a.ADDRESS AS SHIP_ADDRESS + ,a.ZIPCODE AS SHIP_ZIPCODE + ,a.STATE AS SHIP_STATE + ,a.COUNTRY AS SHIP_COUNTRY + + --order details + ,o.SHIPPING_SERVICE + ,o.STATUS + --numbers + ,o.ORDER_COST + ,coalesce(p.DISCOUNT,0) as ORDER_DISCOUNT_AMT + ,o.ORDER_COST - ORDER_DISCOUNT_AMT as PRE_DISCOUNT_TOTAL + ,o.SHIPPING_COST as CUSTOMER_SHIPPING_COST + ,o.ORDER_TOTAL AS CUSTOMER_ORDER_TOTAL + from orders o + left join users u + on o.user_guid = u.user_guid + left join addresses a + on o.address_guid = a.address_guid + left join promos p + on o.promo_guid = p.promo_guid +) + +select +* +from combined \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/_postgres__sources.yml b/greenery/models/example/staging/postgres/_postgres__sources.yml new file mode 100644 index 000000000..1dbc1dc9b --- /dev/null +++ b/greenery/models/example/staging/postgres/_postgres__sources.yml @@ -0,0 +1,43 @@ +version: 2 + +sources: + + - name: postgres + schema: public + database: raw + + quoting: + database: false + schema: false + identifier: false + + tables: + - name: addresses + description: > + Addresses of our users + - name: events + loaded_at_field: created_at + description: > + Website event data + - name: orders + loaded_at_field: created_at + description: > + Orders Placed + - name: order_items + description: > + Product ids for the products of each order that was made + - name: products + description: > + Products sold + - name: promos + description: > + Promos run + - name: users + loaded_at_field: created_at + description: > + Users at Greenery + + + + + diff --git a/greenery/models/example/staging/postgres/_postgres_models.yml b/greenery/models/example/staging/postgres/_postgres_models.yml new file mode 100644 index 000000000..e248caf02 --- /dev/null +++ b/greenery/models/example/staging/postgres/_postgres_models.yml @@ -0,0 +1,60 @@ +version: 2 + +models: + - name: src_addresses + description: "Addresses of our users" + columns: + - name: address_guid + description: "The primary key for this table" + tests: + - unique + - not_null + - name: src_events + description: "Website event data" + columns: + - name: event_guid + description: "The primary key for this table" + tests: + - unique + - not_null + - name: src_orders + description: "Orders Placed" + columns: + - name: order_guid + description: "The primary key for this table" + tests: + - unique + - not_null + - name: src_order_items + description: "Product ids for the products of each order that was made" + columns: + - name: order_item_id + description: "The primary key for this table" + tests: + - unique + - not_null + - name: src_products + description: "Products sold" + columns: + - name: product_guid + description: "The primary key for this table" + tests: + - unique + - not_null + - name: src_promos + description: "Promos run" + columns: + - name: promo_guid + description: "The primary key for this table" + tests: + - unique + - not_null + - name: src_users + description: "Users at Greenery" + columns: + - name: user_guid + description: "The primary key for this table" + tests: + - unique + - not_null + \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/src_addresses.sql b/greenery/models/example/staging/postgres/src_addresses.sql new file mode 100644 index 000000000..d10fa71e4 --- /dev/null +++ b/greenery/models/example/staging/postgres/src_addresses.sql @@ -0,0 +1,17 @@ +{{ config(materialized='view') }} + + +with addresses as ( + select + address_id as address_guid, + address, + lpad(zipcode,5,0) as zipcode, + state, + country + from {{source('postgres','addresses')}} +) + + +SELECT +* +from addresses \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/src_events.sql b/greenery/models/example/staging/postgres/src_events.sql new file mode 100644 index 000000000..29ba92641 --- /dev/null +++ b/greenery/models/example/staging/postgres/src_events.sql @@ -0,0 +1,23 @@ + + +{{ config(materialized='view') }} + + +with events as ( + select + event_id as event_guid, + session_id as session_guid, + user_id as user_guid, + page_url, + convert_timezone('America/New_York',created_at) as created_at_tstamp_est, + event_type, + order_id as order_guid, + product_id as product_guid + from {{source('postgres','events')}} + + +) + +SELECT +* +from events \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/src_order_items.sql b/greenery/models/example/staging/postgres/src_order_items.sql new file mode 100644 index 000000000..fea4800ab --- /dev/null +++ b/greenery/models/example/staging/postgres/src_order_items.sql @@ -0,0 +1,16 @@ + + +{{ config(materialized='view') }} + +with order_items as ( + select + iff(order_id is null or product_id is null, null, abs(hash(concat(order_id,product_id)))) as order_item_id, + order_id as order_guid, + product_id as product_guid, + quantity + from {{source('postgres','order_items')}} +) + +SELECT +* +from order_items \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/src_orders.sql b/greenery/models/example/staging/postgres/src_orders.sql new file mode 100644 index 000000000..aa924c487 --- /dev/null +++ b/greenery/models/example/staging/postgres/src_orders.sql @@ -0,0 +1,25 @@ + +{{ config(materialized='view') }} + + +with orders as ( + select + order_id as order_guid, + user_id as user_guid, + promo_id as promo_guid, + address_id as address_guid, + convert_timezone('America/New_York',created_at) as created_at_tstamp_est, + order_cost, + shipping_cost, + order_total, + tracking_id as tracking_guid, + shipping_service, + convert_timezone('America/New_York',estimated_delivery_at) as estimated_delivery_tstamp_est, + convert_timezone('America/New_York',delivered_at) as delivered_at_tstamp_est, + status + from {{source('postgres','orders')}} +) + +SELECT +* +from orders diff --git a/greenery/models/example/staging/postgres/src_products.sql b/greenery/models/example/staging/postgres/src_products.sql new file mode 100644 index 000000000..12d5962e0 --- /dev/null +++ b/greenery/models/example/staging/postgres/src_products.sql @@ -0,0 +1,16 @@ + + +{{ config(materialized='view') }} + +with products as ( + select + product_id as product_guid, + name as product_name, + price, + inventory + from {{source('postgres','products')}} +) + +SELECT +* +from products \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/src_promos.sql b/greenery/models/example/staging/postgres/src_promos.sql new file mode 100644 index 000000000..64571fdda --- /dev/null +++ b/greenery/models/example/staging/postgres/src_promos.sql @@ -0,0 +1,15 @@ + +{{ config(materialized='view') }} + +with promos as ( + select + promo_id as promo_guid, + discount, + status + from {{source('postgres','promos')}} +) + + +SELECT +* +from promos \ No newline at end of file diff --git a/greenery/models/example/staging/postgres/src_users.sql b/greenery/models/example/staging/postgres/src_users.sql new file mode 100644 index 000000000..67049321b --- /dev/null +++ b/greenery/models/example/staging/postgres/src_users.sql @@ -0,0 +1,21 @@ + +{{ config(materialized='view') }} + +with users as ( + select + user_id as user_guid, + first_name, + last_name, + concat(first_name,' ',last_name) as full_name, + email, + phone_number, + convert_timezone('America/New_York',created_at) as created_at_tstamp_est, + convert_timezone('America/New_York',updated_at) as updated_at_tstamp_est, + address_id as address_guid +from {{source('postgres','users')}} + +) + +SELECT +* +from users \ No newline at end of file diff --git a/greenery/packages.yml b/greenery/packages.yml new file mode 100644 index 000000000..474d5940e --- /dev/null +++ b/greenery/packages.yml @@ -0,0 +1,3 @@ +packages: + - package: dbt-labs/dbt_utils + version: 0.9.2 \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/orders_ss.sql b/greenery/snapshots/orders_ss.sql new file mode 100644 index 000000000..f202e10db --- /dev/null +++ b/greenery/snapshots/orders_ss.sql @@ -0,0 +1,31 @@ + +{% snapshot orders_ss %} + +{{ + config( + target_database = target.database, + target_schema = target.schema, + strategy='check', + unique_key='order_guid', + check_cols=['status'], + ) +}} + + +select + order_id as order_guid, + user_id as user_guid, + promo_id as promo_guid, + address_id as address_guid, + convert_timezone('America/New_York',created_at) as created_at_tstamp_est, + order_cost, + shipping_cost, + order_total, + tracking_id as tracking_guid, + shipping_service, + convert_timezone('America/New_York',estimated_delivery_at) as estimated_delivery_tstamp_est, + convert_timezone('America/New_York',delivered_at) as delivered_at_tstamp_est, + status + from {{ source('postgres', 'orders') }} + +{% endsnapshot %} diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/week 1 readme.md b/greenery/week 1 readme.md new file mode 100644 index 000000000..857b782d3 --- /dev/null +++ b/greenery/week 1 readme.md @@ -0,0 +1,91 @@ +Welcome to your new dbt project! + +### Using the starter project + +Questions to week 1 project: +Q: How many users do we have? +A: 130 + + select + count(distinct user_guid) + from DEV_DB.DBT_BAVERY.SRC_USERS; + + +Q: On average, how many orders do we receive per hour? +A: 7.52 + + + with orders as ( + select + date_trunc('hour',created_at_tstamp_est) as hour, + count(distinct order_guid) as orders + from DEV_DB.DBT_BAVERY.SRC_ORDERS + group by 1 + ) + + select + avg(orders) as avg + from orders + +Q: On average, how long does an order take from being placed to being delivered? +A: 3.89 + + with dates as ( + select + datediff('day',created_at_tstamp_est,delivered_at_tstamp_est) as days, + order_guid + from DEV_DB.DBT_BAVERY.SRC_ORDERS + ) + + select + avg(days) + from dates; + + + +Q: How many users have only made one purchase? Two purchases? Three+ purchases? +A: 1 order- 25, 2 orders - 28, 3+ orders - 71 + + + + with user_order as ( + select + user_guid, + count(distinct order_guid) as orders + from DEV_DB.DBT_BAVERY.SRC_ORDERS + GROUP BY 1 + ) + + select + case + when orders >= 3 then '3+' + else orders::text + end as orders, + count(distinct user_guid) as users + from user_order + group by 1 + order by 1 + +Q: On average, how many unique sessions do we have per hour? +A: 16.33 + + with sessions as ( + select + date_trunc('hour',created_at_tstamp_est) as hour, + count(distinct session_guid) as sessions + from DEV_DB.DBT_BAVERY.SRC_EVENTS + group by 1 + + ) + + select + avg(sessions) + from sessions + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/greenery/week 2 readme.md b/greenery/week 2 readme.md new file mode 100644 index 000000000..9ad85b9b5 --- /dev/null +++ b/greenery/week 2 readme.md @@ -0,0 +1,31 @@ +1: What is our user repeat rate? +Answer: 79.8% + + select + count(distinct case when order_count >= 2 then user_guid end) as two_or_more, + count(distinct case when order_count >= 1 then user_guid end) as has_ordered, + two_or_more/has_ordered + from dev_db.dbt_bavery.user_order_fact + +2:What are good indicators of a user who will likely purchase again? What about indicators of users who are likely NOT to purchase again? If you had more data, what features would you want to look into to answer this question? + +Answer: +Likely purchase again: active on our site, have frequently purchased, hav +Likely to not purchase again: haven't purchased for a while +If i had more data I would want to look at what retention behavior there is for different products, what marketing channel they came in through, what retention is based off of marketing efforts, etc + +3: Explain the marts models you added. Why did you organize the models in the way you did? + +Answer: I started with fact orders and then worked from there. I brought in some of the raw data from some of my source table for the fact orders table and then i referenced the orders table on some of my other tables to get info on order freqency and things like that + +4: Which orders changed from week 1 to week 2? + +Answer: 3 orders changed from preparing to shipped + + select order_guid, + status, + dbt_valid_from, + dbt_valid_to, + rank() over (partition by order_guid order by dbt_valid_from) as row_rank + from dev_db.dbt_bavery.orders_ss + qualify count(*) over (partition by order_guid) > 1 \ No newline at end of file diff --git a/greenery/week 3 readme.md b/greenery/week 3 readme.md new file mode 100644 index 000000000..fa52fb23a --- /dev/null +++ b/greenery/week 3 readme.md @@ -0,0 +1,72 @@ + + +****1: What is our overall conversion rate?** +**answer:** 62.5% + + select + count(distinct session_guid) as total_sessions, + count(distinct case when order_guid is not null then session_guid else null end) as conversions, + conversions/total_sessions + from dev_db.dbt_bavery.src_events + + +****2:What is our conversion rate by product?** +**answer:** Query for conversion rate by product + + with products_ordered as ( + select + oli.order_guid, + p.product_name + from dev_db.dbt_bavery.src_order_items oli + left join dev_db.dbt_bavery.dim_products p + on oli.product_guid = p.product_guid + order by oli.order_guid + + ) + + ,converted_sessions as ( + select + pd.product_name, + session_guid + from dev_db.dbt_bavery.src_events e + join products_ordered pd + on e.order_guid = pd.order_guid + group by 1,2 + + ) + + ,all_sessions as ( + select + product_name, + session_guid + from dev_db.dbt_bavery.fact_page_views e + group by 1,2 + ) + + select + s.product_name, + count(distinct s.session_guid) as all_sessions, + count(distinct cs.session_guid) as converted_sessions, + converted_sessions/all_sessions as conversion_rate + from all_sessions s + left join converted_sessions cs --- just in case theres some weirdness where prodcuts are bought but + on s.product_name = cs.product_name + and s.session_guid = cs.session_guid + group by 1 + order by 4 desc + + + +****3:Why some products convert higher than others?** +*answer:** Page load times, different images, the product itself may require more thought so someone may come to the site more times before buying, + + +****4:What orders changed from week 2 to 3?** +*answer:** +e24985f3-2fb3-456e-a1aa-aaf88f490d70 +8385cfcd-2b3f-443a-a676-9756f7eb5404 +5741e351-3124-4de7-9dff-01a448e7dfd4 + + select distinct order_guid + from dev_db.dbt_bavery.orders_ss + where dbt_valid_from::Date = '2022-10-24' or dbt_valid_to::Date ='2022-10-24' \ No newline at end of file