Download Revising Data Warehouse Population Scripts and more Lecture notes Data Warehousing in PDF only on Docsity!
Revising Regular Population Script
USE dw;
/* CUSTOMER_DIM POPULATION
TRUNCATE customer_stg;
LOAD DATA INFILE 'customer.csv'
INTO TABLE customer_stg
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES
( customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state )
Revising Regular Population Script(01)
- Applying SCD 2 ON ADDRESSES
UPDATE
customer_dim a
, customer_stg b
SET
a.expiry_date = SUBDATE(CURRENT_DATE, 1)
WHERE
a.customer_number = b.customer_number
AND ( a.customer_street_address <> b.customer_street_address
OR a.customer_city <> b.customer_city
OR a.customer_zip_code <> b.customer_zip_code
OR a.customer_state <> b.customer_state
OR a.shipping_address <> b.shipping_address
OR a.shipping_city <> b.shipping_city
OR a.shipping_zip_code <> b.shipping_zip_code
OR a.shipping_state <> b.shipping_state
OR a.shipping_address IS NULL
OR a.shipping_city IS NULL
OR a.shipping_zip_code IS NULL
OR a.shipping_state IS NULL)
AND expiry_date = '9999-12-31'
Revising Regular Population Script(03)
- SCD 1 ON NAME UPDATE customer_dim a, customer_stg b SET a.customer_name = b.customer_name WHERE a.customer_number = b.customer_number AND a.expiry_date = '9999-12-31' AND a.customer_name <> b.customer_name ;
Revising Regular Population Script(04)
INSERT INTO customer_dim
SELECT
NULL
, customer_number
, customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
, shipping_address
, shipping_zip_code
, shipping_city
, shipping_state
, CURRENT_DATE
FROM customer_stg
WHERE customer_number NOT IN(
SELECT a.customer_number
FROM
customer_dim a
, customer_stg b
WHERE b.customer_number = a.customer_number )
Revising Regular Population Script(06)
- SCD2 ON PRODUCT NAME AND GROUP UPDATE product_dim a , product_stg b SET expiry_date = SUBDATE(CURRENT_DATE, 1) WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND expiry_date = '9999-12-31' ; INSERT INTO product_dim SELECT NULL , b.product_code , b.product_name , b.product_category , CURRENT_DATE , '9999-12-31' FROM product_dim a , product_stg b WHERE a.product_code = b.product_code AND ( a.product_name <> b.product_name OR a.product_category <> b.product_category ) AND EXISTS ( SELECT * FROM product_dim x WHERE b.product_code = x.product_code AND a.expiry_date = SUBDATE(CURRENT_DATE, 1)) AND NOT EXISTS ( SELECT * FROM product_dim y WHERE b.product_code = y.product_code AND y.expiry_date = '9999-12-31') ;
Revising Regular Population Script(07)
- ADD NEW PRODUCT INSERT INTO product_dim SELECT NULL , product_code , product_name , product_category , CURRENT_DATE , '9999-12-31' FROM product_stg WHERE product_code NOT IN( SELECT y.product_code FROM product_dim x, product_stg y WHERE x.product_code = y.product_code ) ;
Revising Regular Population Script(09)
- SALES_ORDER_FACT POPULATION
INSERT INTO sales_order_fact
SELECT
order_sk
, customer_sk
, product_sk
, date_sk
, order_amount
, order_quantity
FROM
source.sales_order a
, order_dim b
, customer_dim c
, product_dim d
, date_dim e
WHERE
a.order_number = b.order_number
AND a.customer_number = c.customer_number
AND a.order_date >= c.effective_date
AND a.order_date <= c.expiry_date
AND a.product_code = d.product_code
AND a.order_date >= d.effective_date
AND a.order_date <= d.expiry_date
AND a.order_date = e.date
AND a.entry_date = CURRENT_DATE
Creating the script File
• Save all these Commands in text file as
dw_regular_10.sql
Testing(01)
• Adding the order_quantity column to the
sales_order table
USE source;
ALTER TABLE sales_order
ADD order_quantity INT AFTER order_amount
Testing(02)
• USE source;
• INSERT INTO sales_order VALUES
Confirm the revised regular population
- select customer_number no, customer_name name, -> shipping_city, shipping_zip_code zip, shipping_state st, -> effective_date eff, expiry_date exp -> from customer_dim \G
- As a result of the above query you will see 18 rows in set
confirm the revised regular population(01)
- To confirm that the sales data has been populated successfully, query the sales_order_fact table using this statement. - mysql> select order_sk o_sk, customer_sk c_sk, product_sk p_sk, order_date_sk od_sk, -> order_amount amt, order_quantity qty -> from sales_order_fact;
- As the result of the above query you will see 44 rows in set
Adding new Column to date_dim table
- add the promo_ind column in the date_dim table. USE dw; ALTER TABLE date_dim ADD promo_ind CHAR(1) AFTER year ;
- Checking date_dim select * from date_dim where promo_ind IS NOT NULL;
- The result of th above query should be Empty set
Creating script for Populating the promotion indicator
- Write the commands in text file and Save it as on_demand.sql
USE dw;
TRUNCATE promo_schedule_stg;
LOAD DATA INFILE 'promo_schedule.csv'
INTO TABLE promo_schedule_stg
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 LINES
( promo_code
, promo_name
, promo_start_date
, promo_last_date )
UPDATE
date_dim a
, promo_schedule_stg b
SET a.promo_ind = 'Y'
WHERE
a.date >= b.promo_start_date
AND a.date <= b.promo_last_date