Closed alberttwong closed 5 months ago
https://forum.starrocks.io/t/retail-ecommerce-funnel-analysis-demo-with-1-million-members-and-87-million-record-dataset-using-starrocks/269
VS
mysql> select count(*) from user_behavior; +----------+ | count(*) | +----------+ | 86953525 | +----------+ 1 row in set (0.14 sec) mysql> with tmp1 as ( -> with tmp as ( -> select -> t.level as level, -> count(UserID) as res -> from -> ( -> select -> UserID, -> window_funnel( -> 18000, -> `Timestamp`, -> 0, -> [BehaviorType = 'pv' , -> BehaviorType = 'cart', -> BehaviorType = 'buy' ] -> ) as level -> from -> user_behavior -> where `Timestamp` >= '2017-12-02 00:00:00' -> and `Timestamp` <= '2017-12-02 23:59:59' -> group by -> UserID -> ) as t -> where -> t.level > 0 -> group by -> t.level -> ) -> select -> tmp.level, -> sum(tmp.res) over ( order by tmp.level rows between current row and unbounded following ) as retention from tmp ) select tmp1.level, tmp1.retention, last_value(tmp1.retention) over( order by tmp1.level rows between current row and 1 following )/ tmp1.retention as retention_ratio from tmp1; -> order by -> tmp.level rows between current row -> and unbounded following -> ) as retention -> from -> tmp -> ) -> select -> tmp1.level, -> tmp1.retention, -> last_value(tmp1.retention) over( -> order by -> tmp1.level rows between current row -> and 1 following -> )/ tmp1.retention as retention_ratio -> from -> tmp1; +-------+-----------+---------------------+ | level | retention | retention_ratio | +-------+-----------+---------------------+ | 1 | 913314 | 0.34725078122091635 | | 2 | 317149 | 0.23266981765668504 | | 3 | 73791 | 1 | +-------+-----------+---------------------+ 3 rows in set (1.94 sec) mysql> with tmp1 as ( -> with tmp as ( -> select -> ItemID, -> t.level as level, -> count(UserID) as res -> from -> ( -> select -> ItemID, -> UserID, -> window_funnel( -> 1800, -> timestamp, -> 0, -> [BehaviorType = 'pv', -> BehaviorType ='buy' ] -> ) as level -> from -> user_behavior -> where timestamp >= '2017-12-02 00:00:00' -> and timestamp <= '2017-12-02 23:59:59' -> group by -> ItemID, -> UserID -> ) as t -> where -> t.level > 0 -> group by -> t.ItemID, -> t.level -> ) -> select -> tmp.ItemID, -> tmp.level, -> sum(tmp.res) over ( -> partition by tmp.ItemID -> order by -> tmp.level rows between current row -> and unbounded following -> ) as retention -> from -> tmp -> ) -> select -> tmp1.ItemID, -> tmp1.level, -> tmp1.retention / last_value(tmp1.retention) over( -> partition by tmp1.ItemID -> order by -> tmp1.level desc rows between current row -> and 1 following -> ) as retention_ratio -> from -> tmp1 -> order by -> level desc, -> retention_ratio -> limit -> 10; +---------+-------+-----------------------+ | ItemID | level | retention_ratio | +---------+-------+-----------------------+ | 59883 | 2 | 0.0003616636528028933 | | 394978 | 2 | 0.0006357279084551812 | | 1164931 | 2 | 0.0006648936170212766 | | 4622270 | 2 | 0.0007692307692307692 | | 812879 | 2 | 0.0009121313469139556 | | 1783990 | 2 | 0.0009132420091324201 | | 3847054 | 2 | 0.000925925925925926 | | 2742138 | 2 | 0.0009881422924901185 | | 530918 | 2 | 0.0010193679918450561 | | 600756 | 2 | 0.0010319917440660474 | +---------+-------+-----------------------+ 10 rows in set (3.67 sec) mysql> select -> log.BehaviorType, -> count(log.BehaviorType) -> from -> ( -> select -> ItemID, -> UserID, -> window_funnel( -> 1800, -> timestamp, -> 0, -> [BehaviorType = 'pv' , -> BehaviorType = 'buy' ] -> ) as level -> from -> user_behavior -> where timestamp >= '2017-12-02 00:00:00' -> and timestamp <= '2017-12-02 23:59:59' -> group by -> ItemID, -> UserID -> ) as list -> left join ( -> select -> UserID, -> array_agg(BehaviorType) as BehaviorType -> from -> user_behavior -> where -> ItemID = 3563468 -> and timestamp >= '2017-12-02 00:00:00' -> and timestamp <= '2017-12-02 23:59:59' -> group by -> UserID -> ) as log on list.UserID = log.UserID -> where -> list.ItemID = 3563468 -> and list.level = 1 -> group by -> log.BehaviorType -> order by -> count(BehaviorType) desc; +--------------------------------------+-------------------------+ | BehaviorType | count(log.BehaviorType) | +--------------------------------------+-------------------------+ | ["pv"] | 1589 | | ["pv","pv"] | 52 | | ["pv","pv","pv"] | 10 | | ["cart","pv"] | 8 | | ["cart","pv","pv"] | 6 | | ["fav","pv"] | 6 | | ["fav","pv","pv"] | 3 | | ["pv","pv","pv","pv"] | 2 | | ["cart","pv","pv","pv"] | 2 | | ["cart","pv","pv","pv","pv"] | 1 | | ["pv","pv","pv","pv","pv","pv","pv"] | 1 | | ["fav","pv","pv","pv","pv"] | 1 | | ["fav","pv","pv","pv","cart"] | 1 | | ["pv","cart","pv"] | 1 | | ["pv","pv","cart"] | 1 | +--------------------------------------+-------------------------+ 15 rows in set (3.10 sec) mysql>
I cannot duplicate the results in https://github.com/StarRocks/starrocks/discussions/26075
https://forum.starrocks.io/t/retail-ecommerce-funnel-analysis-demo-with-1-million-members-and-87-million-record-dataset-using-starrocks/269
VS