Closed alberttwong closed 7 months ago
question 2 with JOINS
with tmp1 as (
with tmp as (
select
ItemID,
t.level as level,
count(UserID) as res
from
(
select
ItemID,
UserID,
window_funnel(
1800,
timestamp,
0,
[BehaviorType = 'pv',
BehaviorType ='buy' ]
) as level
from
user_behavior
where timestamp >= '2017-12-02 00:00:00'
and timestamp <= '2017-12-02 23:59:59'
group by
ItemID,
UserID
) as t
where
t.level > 0
group by
t.ItemID,
t.level
)
select
tmp.ItemID,
tmp.level,
sum(tmp.res) over (
partition by tmp.ItemID
order by
tmp.level rows between current row
and unbounded following
) as retention
from
tmp
)
select
tmp1.ItemID,
i.name,
tmp1.level,
tmp1.retention / last_value(tmp1.retention) over(
partition by tmp1.ItemID
order by
tmp1.level desc rows between current row
and 1 following
) as retention_ratio
from
tmp1
JOIN item i ON tmp1.ItemID = i.ItemID
order by
tmp1.level desc,
retention_ratio
limit
10;
output
+---------+--------------+-------+-----------------------+
| ItemID | name | level | retention_ratio |
+---------+--------------+-------+-----------------------+
| 59883 | item 59883 | 2 | 0.0003616636528028933 |
| 394978 | item 394978 | 2 | 0.0006357279084551812 |
| 1164931 | item 1164931 | 2 | 0.0006648936170212766 |
| 4622270 | item 4622270 | 2 | 0.0007692307692307692 |
| 812879 | item 812879 | 2 | 0.0009121313469139556 |
| 1783990 | item 1783990 | 2 | 0.0009132420091324201 |
| 3847054 | item 3847054 | 2 | 0.000925925925925926 |
| 2742138 | item 2742138 | 2 | 0.0009881422924901185 |
| 530918 | item 530918 | 2 | 0.0010193679918450561 |
| 600756 | item 600756 | 2 | 0.0010319917440660474 |
+---------+--------------+-------+-----------------------+
10 rows in set (5.07 sec)
run in the spark container.
run in the scala prompt
run in the scala prompt
run in the mysql client
optional to run in the mysql prompt
3 sql exercises at https://forum.starrocks.io/t/retail-ecommerce-funnel-analysis-demo-with-1-million-members-and-87-million-record-dataset-using-starrocks/269