In this section we are going to create SQL script for ETL process to fill the Dim and Fact tables of Data Warehouse with data from OLTP database tables. The process includes the following steps.
1. Plan and document ETL process in BI documentation excel sheet to match the respective OLTP, OLAP fields with ETL scripts.
2. Here we use Flush and Fill ETL process, so first step is to drop the foreign constraints and clear the tables. ( this is called flush the tables)
3. Truncate all the Dim and Fact tables in Data Warehouse to free the allocated space in a memory.
4. Fill the Dim and Fact tables with data from OLTP tables with casting where ever needed to match the data type between two respective tables from OLTP and OLAP.
5. Re-create the foreign key constraints on the tables in the Data Warehouse.
6. Review and confirm that data has been filled in the Dim and Fact tables in Data Warehouse solution.
The following picture shows the planning sheet of ETL process.
The below SQL script describes more detail about ETL process step by step.
/**************************************************************
Create the Data Warehouse
*************************************************************/
--****************** [DWNorthwindLite] *********************--
-- This file contains ETL code for use with the
-- [DWNorthwindLite] database.
--****************** Your Version ***************************--
USE [DWNorthwindLite]
Go
--********************************************************************--
-- 1) Drop the FOREIGN KEY CONSTRAINTS and Clear the tables
--********************************************************************--
Alter Table [DWNorthwindLite].dbo.FactOrders
Drop Constraint [FK_FactOrders_DimProducts]
Alter Table [DWNorthwindLite].dbo.FactOrders
Drop Constraint [FK_FactOrders_DimCustomers]
Alter Table [DWNorthwindLite].dbo.FactOrders
Drop Constraint [FK_FactOrders_DimDates]
Truncate Table [DWNorthwindLite].dbo.DimProducts
Truncate Table [DWNorthwindLite].dbo.DimCustomers
Truncate Table [DWNorthwindLite].dbo.DimDates
Truncate Table [DWNorthwindLite].dbo.FactOrders
--********************************************************************--
-- 2) FILL the Tables
--********************************************************************--
/****** [dbo].[DimProducts] ******/
INSERT INTO [DWNorthwindLite].dbo.DimProducts
SELECT
[ProductID] = Products.ProductID
,[ProductName] = CAST(Products.ProductName as nVarchar(100))
,[ProductCategoryID] = Products.CategoryID
,[ProductCategoryName] = CAST(Categories.CategoryName as nVarchar(100))
,[StartDate] = 20000101
,[EndDate] = Null
,[IsCurrent] = 'Yes'
FROM [NorthwindLite].dbo.Categories
INNER JOIN [NorthwindLite].dbo.Products
ON Categories.CategoryID = Products.CategoryID;
Go
/****** [dbo].[DimCustomers] ******/
INSERT INTO DimCustomers
SELECT [CustomerID] = [NorthwindLite].[dbo].[Customers].[CustomerID]
,[CustomerName] = CAST([NorthwindLite].[dbo].[Customers].[CompanyName] as NVARCHAR(100))
,[CustomerCity] = CAST([NorthwindLite].[dbo].[Customers].[City] as NVARCHAR(100))
,[CustomerCountry] = CAST([NorthwindLite].[dbo].[Customers].[Country] as nvarchar(100))
,[StartDate] = 20000101
,[EndDate] = NULL
,[IsCurrent] = 'YES'
From [NorthwindLite].[dbo].[Customers]
Go
/****** [dbo].[DimDates] ******/
DECLARE @DateKey Date
DECLARE @EndDate Date
SET @DateKey = '1/1/1950'
SET @EndDate = '12/31/2050'
WHILE (@DateKey <= @EndDate)
BEGIN
INSERT INTO DimDates
SELECT [DateKey] = Cast( Convert(nVarchar(100), @DateKey, 112) as int)
, [USADateName] = Convert(nVarchar(100), @DateKey, 110)
, [MonthKey] = Cast( Convert(nVarchar(100), DatePart(MONTH,@DateKey)) as int)
, [MonthName] = Convert(nVarchar(100), DateName(MONTH,@DateKey), 110)
, [QuarterKey] = Cast( Convert(nVarchar(100), DatePart(QUARTER,@DateKey), 110) as int)
, [QuarterName] = Convert(nVarchar(100), DateName(QUARTER,@DateKey), 110)
, [YearKey] = Cast( Convert(nVarchar(100), DatePart(Year,@DateKey), 110) as int)
, [YearName] = Convert(nVarchar(100), DateName(Year,@DateKey), 110)
SET @DateKey = DATEADD(DAY,1, @DateKey)
END
Go
/****** [dbo].[FactOrders] ******/
INSERT INTO [DWNorthwindLite].[dbo].[FactOrders]
SELECT [OrderID] = [NorthwindLite].[dbo].[Orders].[OrderID]
,[CustomerKey] = [DWNorthwindLite].[dbo].[DimCustomers].[CustomerKey]
,[OrderDateKey] = CAST(CONVERT(nvarchar(50),[NorthwindLite].[dbo].[Orders].[OrderDate], 112) as int)
,[ProductKey] = [DWNorthwindLite].[dbo].[DimProducts].[ProductKey]
,[ActualOrderUnitPrice]= [NorthwindLite].[dbo].[OrderDetails].[UnitPrice]
,[ActualOrderQuantity] = CAST([NorthwindLite].[dbo].[OrderDetails].[Quantity] as smallint)
FROM [NorthwindLite].[dbo].[Orders]
INNER JOIN
[NorthwindLite].[dbo].[OrderDetails]
ON
[NorthwindLite].[dbo].[Orders].[OrderID] = [NorthwindLite].[dbo].[OrderDetails].[OrderID]
INNER JOIN
[DimCustomers]
ON
[NorthwindLite].[dbo].[Orders].[CustomerID] = [DimCustomers].[CustomerID]
INNER JOIN
[DimProducts]
ON
[NorthwindLite].[dbo].[OrderDetails].[ProductID] = [DimProducts].[ProductID]
Go
--********************************************************************--
-- 3) Re-Create the FOREIGN KEY CONSTRAINTS
--********************************************************************--
ALTER TABLE DWNorthwindLite.dbo.FactOrders
ADD CONSTRAINT FK_FactOrders_DimProducts
FOREIGN KEY (ProductKey) REFERENCES DimProducts(ProductKey)
ALTER TABLE DWNorthwindLite.dbo.FactOrders
ADD CONSTRAINT FK_FactOrders_DimCustomers
FOREIGN KEY (CustomerKey) REFERENCES DimCustomers(CustomerKey)
ALTER TABLE DWNorthwindLite.dbo.FactOrders
ADD CONSTRAINT FK_FactOrders_DimDates
FOREIGN KEY (OrderDateKey) REFERENCES DimDates(DateKey)
--********************************************************************--
-- Review the results of this script
--********************************************************************--
Select * from [dbo].[DimProducts]
Select * from [dbo].[DimCustomers]
Select * from [dbo].[DimDates]
Select * from [dbo].[FactOrders]
This concludes the ETL process using SQL scripting for a data Warehouse solution.
1. Plan and document ETL process in BI documentation excel sheet to match the respective OLTP, OLAP fields with ETL scripts.
2. Here we use Flush and Fill ETL process, so first step is to drop the foreign constraints and clear the tables. ( this is called flush the tables)
3. Truncate all the Dim and Fact tables in Data Warehouse to free the allocated space in a memory.
4. Fill the Dim and Fact tables with data from OLTP tables with casting where ever needed to match the data type between two respective tables from OLTP and OLAP.
5. Re-create the foreign key constraints on the tables in the Data Warehouse.
6. Review and confirm that data has been filled in the Dim and Fact tables in Data Warehouse solution.
The following picture shows the planning sheet of ETL process.
The below SQL script describes more detail about ETL process step by step.
/**************************************************************
Create the Data Warehouse
*************************************************************/
--****************** [DWNorthwindLite] *********************--
-- This file contains ETL code for use with the
-- [DWNorthwindLite] database.
--****************** Your Version ***************************--
USE [DWNorthwindLite]
Go
--********************************************************************--
-- 1) Drop the FOREIGN KEY CONSTRAINTS and Clear the tables
--********************************************************************--
Alter Table [DWNorthwindLite].dbo.FactOrders
Drop Constraint [FK_FactOrders_DimProducts]
Alter Table [DWNorthwindLite].dbo.FactOrders
Drop Constraint [FK_FactOrders_DimCustomers]
Alter Table [DWNorthwindLite].dbo.FactOrders
Drop Constraint [FK_FactOrders_DimDates]
Truncate Table [DWNorthwindLite].dbo.DimProducts
Truncate Table [DWNorthwindLite].dbo.DimCustomers
Truncate Table [DWNorthwindLite].dbo.DimDates
Truncate Table [DWNorthwindLite].dbo.FactOrders
--********************************************************************--
-- 2) FILL the Tables
--********************************************************************--
/****** [dbo].[DimProducts] ******/
INSERT INTO [DWNorthwindLite].dbo.DimProducts
SELECT
[ProductID] = Products.ProductID
,[ProductName] = CAST(Products.ProductName as nVarchar(100))
,[ProductCategoryID] = Products.CategoryID
,[ProductCategoryName] = CAST(Categories.CategoryName as nVarchar(100))
,[StartDate] = 20000101
,[EndDate] = Null
,[IsCurrent] = 'Yes'
FROM [NorthwindLite].dbo.Categories
INNER JOIN [NorthwindLite].dbo.Products
ON Categories.CategoryID = Products.CategoryID;
Go
/****** [dbo].[DimCustomers] ******/
INSERT INTO DimCustomers
SELECT [CustomerID] = [NorthwindLite].[dbo].[Customers].[CustomerID]
,[CustomerName] = CAST([NorthwindLite].[dbo].[Customers].[CompanyName] as NVARCHAR(100))
,[CustomerCity] = CAST([NorthwindLite].[dbo].[Customers].[City] as NVARCHAR(100))
,[CustomerCountry] = CAST([NorthwindLite].[dbo].[Customers].[Country] as nvarchar(100))
,[StartDate] = 20000101
,[EndDate] = NULL
,[IsCurrent] = 'YES'
From [NorthwindLite].[dbo].[Customers]
Go
/****** [dbo].[DimDates] ******/
DECLARE @DateKey Date
DECLARE @EndDate Date
SET @DateKey = '1/1/1950'
SET @EndDate = '12/31/2050'
WHILE (@DateKey <= @EndDate)
BEGIN
INSERT INTO DimDates
SELECT [DateKey] = Cast( Convert(nVarchar(100), @DateKey, 112) as int)
, [USADateName] = Convert(nVarchar(100), @DateKey, 110)
, [MonthKey] = Cast( Convert(nVarchar(100), DatePart(MONTH,@DateKey)) as int)
, [MonthName] = Convert(nVarchar(100), DateName(MONTH,@DateKey), 110)
, [QuarterKey] = Cast( Convert(nVarchar(100), DatePart(QUARTER,@DateKey), 110) as int)
, [QuarterName] = Convert(nVarchar(100), DateName(QUARTER,@DateKey), 110)
, [YearKey] = Cast( Convert(nVarchar(100), DatePart(Year,@DateKey), 110) as int)
, [YearName] = Convert(nVarchar(100), DateName(Year,@DateKey), 110)
SET @DateKey = DATEADD(DAY,1, @DateKey)
END
Go
/****** [dbo].[FactOrders] ******/
INSERT INTO [DWNorthwindLite].[dbo].[FactOrders]
SELECT [OrderID] = [NorthwindLite].[dbo].[Orders].[OrderID]
,[CustomerKey] = [DWNorthwindLite].[dbo].[DimCustomers].[CustomerKey]
,[OrderDateKey] = CAST(CONVERT(nvarchar(50),[NorthwindLite].[dbo].[Orders].[OrderDate], 112) as int)
,[ProductKey] = [DWNorthwindLite].[dbo].[DimProducts].[ProductKey]
,[ActualOrderUnitPrice]= [NorthwindLite].[dbo].[OrderDetails].[UnitPrice]
,[ActualOrderQuantity] = CAST([NorthwindLite].[dbo].[OrderDetails].[Quantity] as smallint)
FROM [NorthwindLite].[dbo].[Orders]
INNER JOIN
[NorthwindLite].[dbo].[OrderDetails]
ON
[NorthwindLite].[dbo].[Orders].[OrderID] = [NorthwindLite].[dbo].[OrderDetails].[OrderID]
INNER JOIN
[DimCustomers]
ON
[NorthwindLite].[dbo].[Orders].[CustomerID] = [DimCustomers].[CustomerID]
INNER JOIN
[DimProducts]
ON
[NorthwindLite].[dbo].[OrderDetails].[ProductID] = [DimProducts].[ProductID]
Go
--********************************************************************--
-- 3) Re-Create the FOREIGN KEY CONSTRAINTS
--********************************************************************--
ALTER TABLE DWNorthwindLite.dbo.FactOrders
ADD CONSTRAINT FK_FactOrders_DimProducts
FOREIGN KEY (ProductKey) REFERENCES DimProducts(ProductKey)
ALTER TABLE DWNorthwindLite.dbo.FactOrders
ADD CONSTRAINT FK_FactOrders_DimCustomers
FOREIGN KEY (CustomerKey) REFERENCES DimCustomers(CustomerKey)
ALTER TABLE DWNorthwindLite.dbo.FactOrders
ADD CONSTRAINT FK_FactOrders_DimDates
FOREIGN KEY (OrderDateKey) REFERENCES DimDates(DateKey)
--********************************************************************--
-- Review the results of this script
--********************************************************************--
Select * from [dbo].[DimProducts]
Select * from [dbo].[DimCustomers]
Select * from [dbo].[DimDates]
Select * from [dbo].[FactOrders]
This concludes the ETL process using SQL scripting for a data Warehouse solution.