Open anirudhpillai opened 3 months ago
Found a few issues when a funnel query is used to get user paths.
Taking this funnel as the starting point.
When you click on the second step and go to 'show user paths after drop off', the sql generated by the query runner has a few errors that I've highlighted by --FIX
.
SELECT
last_path_key AS source_event,
path_key AS target_event,
count(*) AS event_count,
avg(conversion_time) AS average_conversion_time
FROM
(SELECT
person_id AS person_id,
path AS path,
conversion_time AS conversion_time,
event_in_session_index AS event_in_session_index,
concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key,
if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key,
path_dropoff_key AS path_dropoff_key
FROM
(SELECT
person_id AS person_id,
joined_path_tuple.1 AS path,
joined_path_tuple.2 AS conversion_time,
joined_path_tuple.3 AS prev_path,
event_in_session_index,
session_index AS session_index,
arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0,
arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping,
arrayFilter((x, y) -> y, time, mapping) AS timings,
arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path,
-- FIX 3
-- indexOf(compact_path, NULL) AS target_index,
-- target_index results in the 'source_event' and we want
-- source_event to equal the step after which we measure drop off
indexOf(compact_path, 'https://posthog.com/pricing') AS target_index,
if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path,
arraySlice(filtered_path, 1, 2) AS limited_path,
if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings,
arraySlice(filtered_timings, 1, 2) AS limited_timings,
arrayDifference(limited_timings) AS timings_diff,
concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key,
arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings
FROM
(SELECT
person_id AS person_id,
path_time_tuple.1 AS path_basic,
path_time_tuple.2 AS time,
session_index,
arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple,
arraySplit(x -> if(ifNull(less(plus(parseDateTime64BestEffortOrNull('2018-01-01', 6, 'US/Pacific'), toIntervalSecond(toInt64(x.3))), plus(parseDateTime64BestEffortOrNull('2018-01-01', 6, 'US/Pacific'), toIntervalDay(14))), 0), 0, 1), paths_tuple) AS session_paths
FROM
(SELECT
person_id AS person_id,
groupArray(timestamp) AS timing_list,
groupArray(path_item) AS path_list
FROM
(SELECT
toTimeZone(events.timestamp, 'US/Pacific') AS timestamp,
if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS person_id,
funnel_actors.timestamp AS target_timestamp,
ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped,
NULL AS groupings,
multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index,
(if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item
FROM
events
LEFT OUTER JOIN (SELECT
argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM
person_distinct_id_overrides
WHERE
equals(person_distinct_id_overrides.team_id, 2)
GROUP BY
person_distinct_id_overrides.distinct_id
HAVING
ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)
SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id)
INNER JOIN (SELECT
aggregation_target AS actor_id,
timestamp AS timestamp
FROM
(SELECT
aggregation_target AS aggregation_target,
steps AS steps,
argMax(latest_0, steps) AS timestamp,
argMax(latest_2, steps) AS final_timestamp,
argMax(latest_0, steps) AS first_timestamp,
avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
avg(step_2_conversion_time) AS step_2_average_conversion_time_inner,
median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
median(step_2_conversion_time) AS step_2_median_conversion_time_inner
FROM
(SELECT
aggregation_target AS aggregation_target,
steps AS steps,
max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
step_1_conversion_time AS step_1_conversion_time,
step_2_conversion_time AS step_2_conversion_time,
latest_0 AS latest_0,
latest_2 AS latest_2,
latest_0 AS latest_0
FROM
(SELECT
aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
step_2 AS step_2,
latest_2 AS latest_2,
if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time
FROM
(SELECT
aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
step_2 AS step_2,
min(latest_2) OVER (PARTITION BY aggregation_target ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2
FROM
(SELECT
aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
step_2 AS step_2,
if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2
FROM
(SELECT
aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
min(latest_1) OVER (PARTITION BY aggregation_target ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
step_2 AS step_2,
min(latest_2) OVER (PARTITION BY aggregation_target ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2
FROM
(SELECT
toTimeZone(e.timestamp, 'US/Pacific') AS timestamp,
if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
if(and(equals(e.event, '$pageview'), ifNull(equals(nullIf(nullIf(e.`mat_$current_url`, ''), 'null'), 'https://posthog.com/'), 0)), 1, 0) AS step_0,
if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
if(and(equals(e.event, '$pageview'), ifNull(equals(nullIf(nullIf(e.`mat_$current_url`, ''), 'null'), 'https://posthog.com/pricing'), 0)), 1, 0) AS step_1,
if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
if(and(equals(e.event, '$pageview'), ifNull(equals(nullIf(nullIf(e.`mat_$current_url`, ''), 'null'), 'https://us.posthog.com/signup'), 0)), 1, 0) AS step_2,
if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2
FROM
events AS e
LEFT OUTER JOIN (SELECT
argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM
person_distinct_id_overrides
WHERE
equals(person_distinct_id_overrides.team_id, 2)
GROUP BY
person_distinct_id_overrides.distinct_id
HAVING
ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)
SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE
and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2024-07-25 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2024-08-01 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('$pageview')), ifNull(notILike(nullIf(nullIf(mat_pp_email, ''), 'null'), '%posthog.com%'), 1), and(ifNull(notEquals(nullIf(nullIf(e.`mat_$host`, ''), 'null'), '127.0.0.1:3000'), 1), ifNull(notEquals(nullIf(nullIf(e.`mat_$host`, ''), 'null'), '127.0.0.1:5000'), 1), ifNull(notEquals(nullIf(nullIf(e.`mat_$host`, ''), 'null'), 'localhost:5000'), 1), ifNull(notEquals(nullIf(nullIf(e.`mat_$host`, ''), 'null'), 'localhost:8000'), 1), ifNull(notEquals(nullIf(nullIf(e.`mat_$host`, ''), 'null'), 'localhost:8001'), 1), ifNull(notEquals(nullIf(nullIf(e.`mat_$host`, ''), 'null'), 'localhost'), 1)), ifNull(notILike(nullIf(nullIf(e.mat_distinct_id, ''), 'null'), '%posthog.com%'), 1), ifNull(notEquals(nullIf(nullIf(mat_pp_email, ''), 'null'), 'capturi@capturi.com'), 1), ifNull(notEquals(nullIf(nullIf(mat_pp_email, ''), 'null'), 'fuziontech@gmail.com'), 1), ifNull(notEquals(nullIf(nullIf(e.mat_realm, ''), 'null'), 'demo'), 1), ifNull(notEquals(nullIf(nullIf(e.`mat_$ip`, ''), 'null'), '71.84.26.175'), 1), ifNull(notEquals(nullIf(nullIf(mat_pp_email, ''), 'null'), 'corywatilo@gmail.com'), 1)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
WHERE
ifNull(equals(step_0, 1), 0)))
GROUP BY
aggregation_target,
steps
HAVING
ifNull(equals(steps, max(max_steps)), isNull(steps) and isNull(max(max_steps))))
WHERE
-- FIX 2
-- ifNull(equals(steps, 1), 0)
-- need to update to steps = 2, as we want to measure drop off after
-- second step
-- so here need to filter for users that only did the first 2 steps
-- nothing less/more
ifNull(equals(steps, 2), 0)
ORDER BY
aggregation_target ASC) AS funnel_actors ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), funnel_actors.actor_id)
WHERE
and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'),
-- FIX 1
-- plus(toTimeZone(funnel_actors.timestamp, 'UTC'), toIntervalDay(14))
-- not sure why we're filtering for events after the conversion interval (14 days)
-- that totally drops off all the events we had within the funnel
toTimeZone(funnel_actors.timestamp, 'UTC')
), and(greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2024-07-25 00:00:00', 6, 'US/Pacific')))), lessOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), assumeNotNull(parseDateTime64BestEffortOrNull('2024-08-01 23:59:59', 6, 'US/Pacific')))), or(equals(events.event, '$pageview'), not(startsWith(events.event, '$'))))
ORDER BY
if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) ASC,
toTimeZone(events.timestamp, 'US/Pacific') ASC)
GROUP BY
person_id)
ARRAY JOIN session_paths AS path_time_tuple, arrayEnumerate(session_paths) AS session_index)
ARRAY JOIN limited_path_timings AS joined_path_tuple, arrayEnumerate(limited_path_timings) AS event_in_session_index))
WHERE
isNotNull(source_event)
GROUP BY
source_event,
target_event
ORDER BY
event_count DESC,
source_event ASC,
target_event ASC
LIMIT 50 SETTINGS readonly=2, max_execution_time=600, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=2000000, max_expanded_ast_elements=2000000, max_query_size=1048576, max_bytes_before_external_group_by=23622320128
It looks like the queries we're sending when we come from paths are different than when you generate it from funnels. Maybe just lacking "funnelFilters" and the "funnelVizType." It seems to cause it to return nothing.
Bug Description
Bug description
On any funnel query when you click the options to view 'User paths after drop off', you don't get any results
How to reproduce
Debug info