Dimension tables – dim.products
In this video we encounter another data quality issue when we build our first dimension table. Some productids have more than one product description.
Resources
Here’s the SQL to create the dim.products table (excluding productids with more than one product description):
CREATE TABLE superstore.dim.products(
productid VARCHAR(50) PRIMARY KEY
,category VARCHAR(50)
,subcategory VARCHAR(50)
,productname VARCHAR(150)
)
;
INSERT INTO superstore.dim.products
WITH dupes AS(
SELECT productid
,category
,subcategory
,productname
,CASE
WHEN LEAD(productid,1) OVER(ORDER BY productid) = productid
THEN 1
WHEN LAG(productid,1) OVER(ORDER BY productid) = productid
THEN 1
ELSE 0
END AS isdupe
FROM superstore.staging.orderitems
GROUP BY productid,category,subcategory,productname
ORDER BY productid
)
SELECT productid
,category
,subcategory
,productname
FROM dupes
WHERE isdupe = 0
ORDER BY productid
;
SELECT *
FROM superstore.dim.products;
And here’s a very similar query to create a table of the problem products with more than one description (to be resolved)
CREATE TABLE superstore.staging.problemproducts(
productid VARCHAR(50)
,category VARCHAR(50)
,subcategory VARCHAR(50)
,productname VARCHAR(150)
)
;
INSERT INTO superstore.staging.problemproducts
WITH dupes AS(
SELECT productid
,category
,subcategory
,productname
,CASE
WHEN LEAD(productid,1) OVER(ORDER BY productid) = productid
THEN 1
WHEN LAG(productid,1) OVER(ORDER BY productid) = productid
THEN 1
ELSE 0
END AS isdupe
FROM superstore.staging.orderitems
GROUP BY productid,category,subcategory,productname
ORDER BY productid
)
SELECT productid
,category
,subcategory
,productname
FROM dupes
WHERE isdupe = 1
ORDER BY productid
;
SELECT *
FROM superstore.staging.problemproducts;
