Dimension tables – dim.products

In this video we encounter another data quality issue when we build our first dimension table. Some productids have more than one product description.

Resources

Here’s the SQL to create the dim.products table (excluding productids with more than one product description):


CREATE TABLE 	superstore.dim.products(
	productid VARCHAR(50) PRIMARY KEY
	,category VARCHAR(50)
	,subcategory VARCHAR(50)
	,productname VARCHAR(150)
)
;

INSERT INTO 	superstore.dim.products

WITH 		dupes AS(
SELECT			productid
				,category
				,subcategory
				,productname
				,CASE 
					WHEN	LEAD(productid,1) OVER(ORDER BY productid) = productid
					THEN	1
					WHEN	LAG(productid,1) OVER(ORDER BY productid) = productid
					THEN	1
					ELSE	0
				END AS isdupe
FROM			superstore.staging.orderitems
GROUP BY		productid,category,subcategory,productname
ORDER BY		productid
)
SELECT 		productid
			,category
			,subcategory
			,productname
FROM		dupes
WHERE		isdupe = 0
ORDER BY	productid
;

SELECT  	*
FROM 		superstore.dim.products;

And here’s a very similar query to create a table of the problem products with more than one description (to be resolved)


CREATE TABLE 	superstore.staging.problemproducts(
	productid VARCHAR(50) 
	,category VARCHAR(50)
	,subcategory VARCHAR(50)
	,productname VARCHAR(150)
)
;

INSERT INTO 	superstore.staging.problemproducts

WITH 		dupes AS(
SELECT			productid
				,category
				,subcategory
				,productname
				,CASE 
					WHEN	LEAD(productid,1) OVER(ORDER BY productid) = productid
					THEN	1
					WHEN	LAG(productid,1) OVER(ORDER BY productid) = productid
					THEN	1
					ELSE	0
				END AS isdupe
FROM			superstore.staging.orderitems
GROUP BY		productid,category,subcategory,productname
ORDER BY		productid
)
SELECT 		productid
			,category
			,subcategory
			,productname
FROM		dupes
WHERE		isdupe = 1
ORDER BY	productid
;

SELECT  	*
FROM 		superstore.staging.problemproducts;