Ranking and Filtering - RecapΒΆ
Let us recap the procedure to get top 5 products by revenue for each day.
We have our original data in orders and order_items
We can pre-compute the data or create a view with the logic to generate daily product revenue
Then, we have to use the view or table or even sub query to compute rank
Once the ranks are computed, we need to use sub query to filter based up on our requirement.
Let us come up with the query to compute daily product revenue.
%load_ext sql
The sql extension is already loaded. To reload it, use:
%reload_ext sql
%env DATABASE_URL=postgresql://itversity_retail_user:retail_password@localhost:5432/itversity_retail_db
env: DATABASE_URL=postgresql://itversity_retail_user:retail_password@localhost:5432/itversity_retail_db
%%sql
SELECT o.order_date,
oi.order_item_product_id,
round(sum(oi.order_item_subtotal)::numeric, 2) AS revenue
FROM orders o JOIN order_items oi
ON o.order_id = oi.order_item_order_id
WHERE o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY o.order_date, oi.order_item_product_id
ORDER BY o.order_date, revenue DESC
LIMIT 30
* postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
30 rows affected.
order_date | order_item_product_id | revenue |
---|---|---|
2013-07-25 00:00:00 | 1004 | 5599.72 |
2013-07-25 00:00:00 | 191 | 5099.49 |
2013-07-25 00:00:00 | 957 | 4499.70 |
2013-07-25 00:00:00 | 365 | 3359.44 |
2013-07-25 00:00:00 | 1073 | 2999.85 |
2013-07-25 00:00:00 | 1014 | 2798.88 |
2013-07-25 00:00:00 | 403 | 1949.85 |
2013-07-25 00:00:00 | 502 | 1650.00 |
2013-07-25 00:00:00 | 627 | 1079.73 |
2013-07-25 00:00:00 | 226 | 599.99 |
2013-07-25 00:00:00 | 24 | 319.96 |
2013-07-25 00:00:00 | 821 | 207.96 |
2013-07-25 00:00:00 | 625 | 199.99 |
2013-07-25 00:00:00 | 705 | 119.99 |
2013-07-25 00:00:00 | 572 | 119.97 |
2013-07-25 00:00:00 | 666 | 109.99 |
2013-07-25 00:00:00 | 725 | 108.00 |
2013-07-25 00:00:00 | 134 | 100.00 |
2013-07-25 00:00:00 | 906 | 99.96 |
2013-07-25 00:00:00 | 828 | 95.97 |
2013-07-25 00:00:00 | 810 | 79.96 |
2013-07-25 00:00:00 | 924 | 79.95 |
2013-07-25 00:00:00 | 926 | 79.95 |
2013-07-25 00:00:00 | 93 | 74.97 |
2013-07-25 00:00:00 | 835 | 63.98 |
2013-07-25 00:00:00 | 897 | 49.98 |
2013-07-26 00:00:00 | 1004 | 10799.46 |
2013-07-26 00:00:00 | 365 | 7978.67 |
2013-07-26 00:00:00 | 957 | 6899.54 |
2013-07-26 00:00:00 | 191 | 6799.32 |
Let us compute the rank for each product with in each date using revenue as criteria.
%%sql
SELECT nq.*,
dense_rank() OVER (
PARTITION BY order_date
ORDER BY revenue DESC
) AS drnk
FROM (
SELECT o.order_date,
oi.order_item_product_id,
round(sum(oi.order_item_subtotal)::numeric, 2) AS revenue
FROM orders o
JOIN order_items oi
ON o.order_id = oi.order_item_order_id
WHERE o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY o.order_date, oi.order_item_product_id
) nq
ORDER BY order_date, revenue DESC
LIMIT 30
* postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
30 rows affected.
order_date | order_item_product_id | revenue | drnk |
---|---|---|---|
2013-07-25 00:00:00 | 1004 | 5599.72 | 1 |
2013-07-25 00:00:00 | 191 | 5099.49 | 2 |
2013-07-25 00:00:00 | 957 | 4499.70 | 3 |
2013-07-25 00:00:00 | 365 | 3359.44 | 4 |
2013-07-25 00:00:00 | 1073 | 2999.85 | 5 |
2013-07-25 00:00:00 | 1014 | 2798.88 | 6 |
2013-07-25 00:00:00 | 403 | 1949.85 | 7 |
2013-07-25 00:00:00 | 502 | 1650.00 | 8 |
2013-07-25 00:00:00 | 627 | 1079.73 | 9 |
2013-07-25 00:00:00 | 226 | 599.99 | 10 |
2013-07-25 00:00:00 | 24 | 319.96 | 11 |
2013-07-25 00:00:00 | 821 | 207.96 | 12 |
2013-07-25 00:00:00 | 625 | 199.99 | 13 |
2013-07-25 00:00:00 | 705 | 119.99 | 14 |
2013-07-25 00:00:00 | 572 | 119.97 | 15 |
2013-07-25 00:00:00 | 666 | 109.99 | 16 |
2013-07-25 00:00:00 | 725 | 108.00 | 17 |
2013-07-25 00:00:00 | 134 | 100.00 | 18 |
2013-07-25 00:00:00 | 906 | 99.96 | 19 |
2013-07-25 00:00:00 | 828 | 95.97 | 20 |
2013-07-25 00:00:00 | 810 | 79.96 | 21 |
2013-07-25 00:00:00 | 924 | 79.95 | 22 |
2013-07-25 00:00:00 | 926 | 79.95 | 22 |
2013-07-25 00:00:00 | 93 | 74.97 | 23 |
2013-07-25 00:00:00 | 835 | 63.98 | 24 |
2013-07-25 00:00:00 | 897 | 49.98 | 25 |
2013-07-26 00:00:00 | 1004 | 10799.46 | 1 |
2013-07-26 00:00:00 | 365 | 7978.67 | 2 |
2013-07-26 00:00:00 | 957 | 6899.54 | 3 |
2013-07-26 00:00:00 | 191 | 6799.32 | 4 |
Now let us see how we can filter the data.
%%sql
SELECT * FROM (
SELECT nq.*,
dense_rank() OVER (
PARTITION BY order_date
ORDER BY revenue DESC
) AS drnk
FROM (
SELECT o.order_date,
oi.order_item_product_id,
round(sum(oi.order_item_subtotal)::numeric, 2) AS revenue
FROM orders o
JOIN order_items oi
ON o.order_id = oi.order_item_order_id
WHERE o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY o.order_date, oi.order_item_product_id
) nq
) nq1
WHERE drnk <= 5
ORDER BY order_date, revenue DESC
LIMIT 20
* postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
20 rows affected.
order_date | order_item_product_id | revenue | drnk |
---|---|---|---|
2013-07-25 00:00:00 | 1004 | 5599.72 | 1 |
2013-07-25 00:00:00 | 191 | 5099.49 | 2 |
2013-07-25 00:00:00 | 957 | 4499.70 | 3 |
2013-07-25 00:00:00 | 365 | 3359.44 | 4 |
2013-07-25 00:00:00 | 1073 | 2999.85 | 5 |
2013-07-26 00:00:00 | 1004 | 10799.46 | 1 |
2013-07-26 00:00:00 | 365 | 7978.67 | 2 |
2013-07-26 00:00:00 | 957 | 6899.54 | 3 |
2013-07-26 00:00:00 | 191 | 6799.32 | 4 |
2013-07-26 00:00:00 | 1014 | 4798.08 | 5 |
2013-07-27 00:00:00 | 1004 | 9599.52 | 1 |
2013-07-27 00:00:00 | 191 | 5999.40 | 2 |
2013-07-27 00:00:00 | 957 | 5699.62 | 3 |
2013-07-27 00:00:00 | 1073 | 5399.73 | 4 |
2013-07-27 00:00:00 | 365 | 5099.15 | 5 |
2013-07-28 00:00:00 | 1004 | 5599.72 | 1 |
2013-07-28 00:00:00 | 957 | 5099.66 | 2 |
2013-07-28 00:00:00 | 365 | 4799.20 | 3 |
2013-07-28 00:00:00 | 403 | 4419.66 | 4 |
2013-07-28 00:00:00 | 191 | 4299.57 | 5 |
%%sql
SELECT * FROM (SELECT dpr.*,
dense_rank() OVER (
PARTITION BY order_date
ORDER BY revenue DESC
) AS drnk
FROM daily_product_revenue AS dpr) q
WHERE drnk <= 5
ORDER BY order_date, revenue DESC
LIMIT 20
* postgresql://itversity_retail_user:***@localhost:5432/itversity_retail_db
20 rows affected.
order_date | order_item_product_id | revenue | drnk |
---|---|---|---|
2013-07-25 00:00:00 | 1004 | 5599.72 | 1 |
2013-07-25 00:00:00 | 191 | 5099.49 | 2 |
2013-07-25 00:00:00 | 957 | 4499.70 | 3 |
2013-07-25 00:00:00 | 365 | 3359.44 | 4 |
2013-07-25 00:00:00 | 1073 | 2999.85 | 5 |
2013-07-26 00:00:00 | 1004 | 10799.46 | 1 |
2013-07-26 00:00:00 | 365 | 7978.67 | 2 |
2013-07-26 00:00:00 | 957 | 6899.54 | 3 |
2013-07-26 00:00:00 | 191 | 6799.32 | 4 |
2013-07-26 00:00:00 | 1014 | 4798.08 | 5 |
2013-07-27 00:00:00 | 1004 | 9599.52 | 1 |
2013-07-27 00:00:00 | 191 | 5999.40 | 2 |
2013-07-27 00:00:00 | 957 | 5699.62 | 3 |
2013-07-27 00:00:00 | 1073 | 5399.73 | 4 |
2013-07-27 00:00:00 | 365 | 5099.15 | 5 |
2013-07-28 00:00:00 | 1004 | 5599.72 | 1 |
2013-07-28 00:00:00 | 957 | 5099.66 | 2 |
2013-07-28 00:00:00 | 365 | 4799.20 | 3 |
2013-07-28 00:00:00 | 403 | 4419.66 | 4 |
2013-07-28 00:00:00 | 191 | 4299.57 | 5 |