diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml index ebd662674..49fb5ffc3 100644 --- a/greenery/dbt_project.yml +++ b/greenery/dbt_project.yml @@ -36,3 +36,5 @@ models: # Config indicated by + and applies to all files under models/example/ staging: +materialized: view + marts: + +materialized: table diff --git a/greenery/models/marts/core/dim_products.sql b/greenery/models/marts/core/dim_products.sql new file mode 100644 index 000000000..d0dc4b173 --- /dev/null +++ b/greenery/models/marts/core/dim_products.sql @@ -0,0 +1,10 @@ +SELECT + products.product_id + , products.name as product_name + , products.price + , CASE WHEN products.inventory = 0 THEN false ELSE true END AS is_in_stock + , products.inventory as number_in_stock + , COUNT(DISTINCT order_items.order_id) as number_of_orders_with_product +FROM {{ ref('stg_products__products') }} AS products +LEFT JOIN {{ ref('stg_order_items__order_items')}} AS order_items ON products.product_id = order_items.product_id +GROUP BY 1, 2, 3, 4, 5 \ No newline at end of file diff --git a/greenery/models/marts/core/dim_users.sql b/greenery/models/marts/core/dim_users.sql new file mode 100644 index 000000000..d0dc4b173 --- /dev/null +++ b/greenery/models/marts/core/dim_users.sql @@ -0,0 +1,10 @@ +SELECT + products.product_id + , products.name as product_name + , products.price + , CASE WHEN products.inventory = 0 THEN false ELSE true END AS is_in_stock + , products.inventory as number_in_stock + , COUNT(DISTINCT order_items.order_id) as number_of_orders_with_product +FROM {{ ref('stg_products__products') }} AS products +LEFT JOIN {{ ref('stg_order_items__order_items')}} AS order_items ON products.product_id = order_items.product_id +GROUP BY 1, 2, 3, 4, 5 \ No newline at end of file diff --git a/greenery/models/marts/core/fct_shipping.sql b/greenery/models/marts/core/fct_shipping.sql new file mode 100644 index 000000000..e90679430 --- /dev/null +++ b/greenery/models/marts/core/fct_shipping.sql @@ -0,0 +1,18 @@ +SELECT + shipping.order_id + , shipping.address_id + , shipping.address + , shipping.zipcode + , shipping.state + , shipping.country + , shipping.is_usa + , shipping.shipping_cost + , shipping.tracking_id + , shipping.shipping_service + , shipping.status + , CASE WHEN date_trunc('day', shipping.estimated_delivery_at_utc) > date_trunc('day', shipping.delivered_at_utc) THEN 'Early' + WHEN date_trunc('day', shipping.estimated_delivery_at_utc) < date_trunc('day', shipping.delivered_at_utc) THEN 'Late' + WHEN date_trunc('day', shipping.estimated_delivery_at_utc) = date_trunc('day', shipping.delivered_at_utc) THEN 'On Time' + ELSE NULL END as delivery_timeliness + , delivered_at_utc::date - created_at_utc::date AS days_from_order_to_delivery +FROM {{ ref('int_shipping') }} AS shipping \ No newline at end of file diff --git a/greenery/models/marts/core/intermediate/int_orders.sql b/greenery/models/marts/core/intermediate/int_orders.sql new file mode 100644 index 000000000..2e7d7458d --- /dev/null +++ b/greenery/models/marts/core/intermediate/int_orders.sql @@ -0,0 +1,18 @@ +SELECT + orders.order_id + , orders.user_id + , orders.address_id as order_address_id + , orders.created_at_utc as order_created_at_utc + , CASE WHEN orders.promo_id IS NOT NULL THEN true ELSE false END AS has_promo_code + , promos.promo_id + , promos.discount AS promo_discount + , orders.order_cost + , orders.shipping_cost + , orders.order_total as order_total_cost + , orders.status as order_status + , orders.tracking_id + , orders.shipping_service + , orders.estimated_delivery_at_utc + , orders.delivered_at_utc +FROM {{ ref('stg_orders__orders') }} AS orders +LEFT JOIN {{ ref('stg_promos__promos') }} AS promos ON orders.promo_id = promos.promo_id \ No newline at end of file diff --git a/greenery/models/marts/core/intermediate/int_shipping.sql b/greenery/models/marts/core/intermediate/int_shipping.sql new file mode 100644 index 000000000..d14927b3d --- /dev/null +++ b/greenery/models/marts/core/intermediate/int_shipping.sql @@ -0,0 +1,19 @@ +SELECT + orders.order_id + , addresses.address_id + , addresses.address + /* check for for zip code length */ + , CASE WHEN LENGTH(addresses.zipcode::text) = 4 THEN CONCAT(0, addresses.zipcode) ELSE zipcode::text END AS zipcode + , addresses.state + , addresses.country + /* Add flag to determin local orders vs int orders */ + , CASE WHEN addresses.country = 'United States' THEN true ELSE false END AS is_usa + , orders.created_at_utc + , orders.shipping_cost + , orders.tracking_id + , orders.shipping_service + , orders.status + , orders.estimated_delivery_at_utc + , orders.delivered_at_utc +FROM {{ ref('stg_addresses__addresses') }} AS addresses +JOIN {{ ref('stg_orders__orders') }} AS orders ON addresses.address_id = orders.address_id \ No newline at end of file diff --git a/greenery/models/marts/core/intermediate/int_users.sql b/greenery/models/marts/core/intermediate/int_users.sql new file mode 100644 index 000000000..3d22a78e7 --- /dev/null +++ b/greenery/models/marts/core/intermediate/int_users.sql @@ -0,0 +1,17 @@ +WITH customer_order_count AS + (SELECT orders.user_id, COUNT(DISTINCT order_id) as number_of_user_orders + FROM {{ ref('stg_orders__orders' ) }} as orders + GROUP BY orders.user_id + ) + +SELECT + users.user_id + , CASE WHEN customer_order_count.number_of_user_orders > 1 THEN true ELSE false END AS is_repeat_customer + , CASE WHEN customer_order_count.number_of_user_orders IS NULL THEN 0 ELSE customer_order_count.number_of_user_orders END AS number_of_user_orders + , users.first_name + , users.last_name + , users.email + , users.phone_number + , users.created_at_utc as user_created_at_utc +FROM {{ ref('stg_users__users') }} AS users +LEFT JOIN customer_order_count ON users.user_id = customer_order_count.user_id \ No newline at end of file diff --git a/greenery/models/marts/core/intermediate/schema.yml b/greenery/models/marts/core/intermediate/schema.yml new file mode 100644 index 000000000..cf7ede36f --- /dev/null +++ b/greenery/models/marts/core/intermediate/schema.yml @@ -0,0 +1,20 @@ +version: 2 + +models: + - name: int_order_shipping + description: shipping info related to order + columns: + - name: address_id + description: UUID for each unique address on platform + tests: + - not_null + - name: zipcode + - name: is_usa + + - name: int_users + description: User information + columns: + - name: user_id + + - name: int_orders + description: promo code with order info \ No newline at end of file diff --git a/greenery/models/marts/core/schema.yml b/greenery/models/marts/core/schema.yml new file mode 100644 index 000000000..9bbd39aa4 --- /dev/null +++ b/greenery/models/marts/core/schema.yml @@ -0,0 +1,26 @@ +version: 2 + +models: + - name: dim_products + description: this dim incclude number of orders containing a product. + columns: + - name: product_id + tests: + - not_null + - unique + + - name: fct_shipping + description: this fact table inclue shipping information related ot order, that includes address, carrier and delivery time. + columns: + - name: order_id + tests: + - not_null + - unique + + - name: dim_users + description: this dim will have information related to first order date and most recent order detail. + columns: + - name: user_id + tests: + - not_null + - unique \ No newline at end of file diff --git a/greenery/models/marts/marketing/fct_user_orders.sql b/greenery/models/marts/marketing/fct_user_orders.sql new file mode 100644 index 000000000..509a3fcbb --- /dev/null +++ b/greenery/models/marts/marketing/fct_user_orders.sql @@ -0,0 +1,26 @@ +SELECT + orders.order_id + , orders.user_id + , users.is_repeat_customer + , users.number_of_user_orders + , users.first_name + , users.last_name + , users.email + , users.phone_number + , users.user_created_at_utc + , orders.order_address_id + , orders.order_created_at_utc + , CASE WHEN orders.promo_id IS NOT NULL THEN true ELSE false END AS has_promo_code + , promos.promo_id + , promos.discount AS promo_discount + , orders.order_cost + , orders.shipping_cost + , orders.order_total_cost + , orders.order_status + , orders.tracking_id + , orders.shipping_service + , orders.estimated_delivery_at_utc + , orders.delivered_at_utc +FROM {{ ref('int_orders') }} AS orders +JOIN {{ ref('int_users' )}} AS users ON users.user_id = orders.user_id +LEFT JOIN {{ ref('stg_promos__promos')}} AS promos ON orders.promo_id = promos.promo_id \ No newline at end of file diff --git a/greenery/models/marts/marketing/schema.yml b/greenery/models/marts/marketing/schema.yml new file mode 100644 index 000000000..4f9b950dd --- /dev/null +++ b/greenery/models/marts/marketing/schema.yml @@ -0,0 +1,10 @@ +version: 2 + +models: + - name: fct_user_orders + description: user order information and promo information + columns: + - name: user_id + tests: + - not_null + \ No newline at end of file diff --git a/greenery/models/marts/product/fct_page_views.sql b/greenery/models/marts/product/fct_page_views.sql new file mode 100644 index 000000000..11704a5af --- /dev/null +++ b/greenery/models/marts/product/fct_page_views.sql @@ -0,0 +1,6 @@ +SELECT events_agg.session_id + , events_agg.user_id + , events_agg.page_view + , events_agg.created_at_utc + , events_agg.page_url +FROM {{ ref('int_session_events_agg') }} AS events_agg \ No newline at end of file diff --git a/greenery/models/marts/product/fct_sessions.sql b/greenery/models/marts/product/fct_sessions.sql new file mode 100644 index 000000000..b6e366cf6 --- /dev/null +++ b/greenery/models/marts/product/fct_sessions.sql @@ -0,0 +1,26 @@ +WITH session_length AS ( + SELECT session_id + , MAX(created_at_utc) AS first_event + , MIN(created_at_utc) AS last_event + FROM {{ ref('int_session_events_agg') }} AS events + GROUP BY session_id +) + +SELECT events_agg.session_id + , events_agg.user_id + , users.first_name + , users.last_name + , users.email + , events_agg.page_view + , events_agg.add_to_cart + , events_agg.checkout + , events_agg.package_shipped + , session_length.first_event + , session_length.last_event + , (DATE_PART('DAY', session_length.last_event::timestamp - session_length.first_event::timestamp) * 24 + + DATE_PART('HOUR', session_length.last_event::timestamp - session_length.first_event::timestamp)) * 60 + + DATE_PART('MINUTE', session_length.last_event::timestamp - session_length.first_event::timestamp) + AS session_length_minutes +FROM {{ ref('int_session_events_agg') }} AS events_agg +LEFT JOIN {{ ref('stg_users__users') }} AS users ON events_agg.user_id = users.user_id +LEFT JOIN session_length ON events_agg.session_id = session_length.session_id \ No newline at end of file diff --git a/greenery/models/marts/product/intermediate/int_session_events_agg.sql b/greenery/models/marts/product/intermediate/int_session_events_agg.sql new file mode 100644 index 000000000..f5e8a699c --- /dev/null +++ b/greenery/models/marts/product/intermediate/int_session_events_agg.sql @@ -0,0 +1,11 @@ +SELECT + events.session_id + , events.created_at_utc + , events.user_id + , events.page_url + , SUM(CASE WHEN event_type = 'package shipped' THEN 1 ELSE 0 END) AS package_shipped + , SUM(CASE WHEN event_type = 'page_view' THEN 1 ELSE 0 END) AS page_view + , SUM(CASE WHEN event_type = 'checkout' THEN 1 ELSE 0 END) AS checkout + , SUM(CASE WHEN event_type = 'add_to_cart' THEN 1 ELSE 0 END) AS add_to_cart +FROM {{ ref('stg_events__events') }} AS events +GROUP BY 1, 2, 3, 4 \ No newline at end of file diff --git a/greenery/models/marts/product/intermediate/schema.yml b/greenery/models/marts/product/intermediate/schema.yml new file mode 100644 index 000000000..e847ab64c --- /dev/null +++ b/greenery/models/marts/product/intermediate/schema.yml @@ -0,0 +1,5 @@ +version: 2 + +models: + - name: int_session_events_agg + description: Aggregated session events by user \ No newline at end of file diff --git a/greenery/models/marts/product/schema.yml b/greenery/models/marts/product/schema.yml new file mode 100644 index 000000000..7f71897f0 --- /dev/null +++ b/greenery/models/marts/product/schema.yml @@ -0,0 +1,7 @@ +version: 2 + +models: + - name: fct_sessions + description: events for related session and User information + - name: fct_page_views + description: Page view events \ No newline at end of file diff --git a/greenery/models/staging/schema.yml b/greenery/models/staging/schema.yml index 72795f6bd..4c8c6a941 100644 --- a/greenery/models/staging/schema.yml +++ b/greenery/models/staging/schema.yml @@ -13,6 +13,7 @@ models: - name: address_id - name: created_at - name: order_cost + - name: shipping_cost - name: order_total - name: tracking_id - name: shipping_service @@ -24,8 +25,6 @@ models: columns: - name: addresses_id description: Unique identifier for an address. - tests: - - unique - name: address - name: zipcode - name: state @@ -40,7 +39,7 @@ models: - name: session_id - name: user_id - name: page_url - - name: created_at + - name: created_at_utc - name: event_type - name: order_id - name: product_id @@ -49,8 +48,6 @@ models: columns: - name: order_id description: Unique identifier for an event. - tests: - - unique - name: product_id - name: qunantity - name: stg_products__products diff --git a/greenery/models/staging/stg_events__events.sql b/greenery/models/staging/stg_events__events.sql index a0756f9cb..7093ec2ff 100644 --- a/greenery/models/staging/stg_events__events.sql +++ b/greenery/models/staging/stg_events__events.sql @@ -15,4 +15,4 @@ final as ( from events ) -select * from events \ No newline at end of file +select * from final \ No newline at end of file diff --git a/greenery/models/staging/stg_orders__orders.sql b/greenery/models/staging/stg_orders__orders.sql index 4b8474106..e8142de36 100644 --- a/greenery/models/staging/stg_orders__orders.sql +++ b/greenery/models/staging/stg_orders__orders.sql @@ -10,6 +10,7 @@ with orders as ( address_id, created_at as created_at_utc, order_cost, + shipping_cost, order_total, tracking_id, shipping_service, diff --git a/greenery/tests/zip_code_length.sql b/greenery/tests/zip_code_length.sql new file mode 100644 index 000000000..23bf30be7 --- /dev/null +++ b/greenery/tests/zip_code_length.sql @@ -0,0 +1,7 @@ +select address_id + , zipcode + , length(zipcode::text) + , state +from "dbt"."dbt_kiran_g"."stg_addresses__addresses" +group by address_id, zipcode, state +having length(zipcode::text) NOT IN (4,5) \ No newline at end of file