mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
246 stars 98 forks source link

[DENG-3905] Support queries with temp udfs when billing project is set #5668

Closed BenWu closed 2 months ago

BenWu commented 2 months ago

fixes DENG-3905

temp udfs turn a query into a script when running in a session (i.e. --billing-project is set) and scripts can't have a destination table. This creates the temp udfs in a separate query.

Also reverts the workaround in #5666 for readability.

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 months ago

Integration report for "Revert "DENG-3889 Replace events_stream temp udfs with inline usages (#5666)""

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-28 18:24:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-28 18:26:29.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_backend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-28 18:24:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-28 18:26:30.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_cirrus.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-28 18:26:30.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_frontend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:26:28.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.events_v1` + `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-28 18:26:31.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `bedrock.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-28 18:26:30.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `burnham.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-28 18:23:52.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-28 18:32:56.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-28 18:23:52.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-28 18:32:55.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-28 18:24:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-28 18:26:31.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `debug_ping_view.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-28 18:23:52.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-28 18:31:38.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-28 18:23:52.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-28 18:31:37.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-28 18:26:31.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_defaultagent.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:26:29.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-28 18:26:32.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_tasks.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-28 18:26:33.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_update.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:26:29.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` UNION ALL SELECT submission_timestamp, @@ -80,7 +80,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-28 18:24:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-28 18:26:33.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_inf ```

⚠️ Only part of the diff is displayed.

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "newline after comment"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-28 18:47:25.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_backend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-28 18:47:26.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_cirrus.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-28 18:45:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-28 18:47:26.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_frontend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:45:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:47:24.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.events_v1` + `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.events_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.interaction_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-28 18:45:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-28 18:47:27.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `bedrock.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-28 18:45:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-28 18:47:26.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `burnham.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-28 18:44:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-28 18:54:10.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-28 18:44:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-28 18:54:10.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-28 18:45:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-28 18:47:27.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `debug_ping_view.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-28 18:44:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-28 18:53:29.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-28 18:44:56.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-28 18:53:30.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-28 18:47:27.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_defaultagent.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:47:25.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-28 18:47:28.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_tasks.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-28 18:47:29.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_update.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 18:47:25.000000000 +0000 @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` UNION ALL SELECT submission_timestamp, @@ -80,7 +80,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-28 18:45:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-28 18:47:29.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.pars ```

⚠️ Only part of the diff is displayed.

Link to full diff

sean-rose commented 2 months ago

temp udfs turn a query into a script when running in a session

Is that a BigQuery bug?

BenWu commented 2 months ago

temp udfs turn a query into a script when running in a session

Is that a BigQuery bug?

I thought so at first but I think it's intended because temp udfs are session-scoped https://cloud.google.com/bigquery/docs/sessions-write-queries#use_temporary_functions_in_sessions

dataops-ci-bot commented 2 months ago

Integration report for "remove TEMP_UDF_PREFIX_RE_STR"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-28 23:13:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-28 23:15:33.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_backend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-28 23:13:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-28 23:15:34.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_cirrus.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-28 23:15:35.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_frontend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 23:15:32.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.events_v1` + `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.events_v1` UNION ALL SELECT submission_timestamp, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-28 23:15:35.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `bedrock.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-28 23:15:34.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `burnham.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-28 23:12:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-28 23:22:13.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-28 23:12:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-28 23:22:12.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-28 23:15:35.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `debug_ping_view.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-28 23:12:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-28 23:20:43.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-28 23:12:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-28 23:20:43.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-28 23:13:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-28 23:15:35.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_defaultagent.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-28 23:13:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-28 23:15:36.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_tasks.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-28 23:15:36.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_update.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-28 23:15:33.000000000 +0000 @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` UNION ALL SELECT submission_timestamp, @@ -80,7 +80,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-28 23:13:35.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-28 23:15:37.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - ```

⚠️ Only part of the diff is displayed.

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "Merge branch 'main' into benwu/session-temp-udfs"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-29 14:27:25.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_backend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-29 14:27:26.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_cirrus.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-29 14:27:26.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `accounts_frontend.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-29 14:27:24.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.events_v1` + `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.events_v1` UNION ALL SELECT submission_timestamp, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-29 14:27:27.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `bedrock.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-29 14:27:26.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `burnham.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-29 14:24:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-29 14:33:57.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-29 14:24:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-29 14:33:57.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-29 14:27:27.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `debug_ping_view.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-29 14:24:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-29 14:33:05.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-29 14:24:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-29 14:33:06.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-29 14:27:27.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_defaultagent.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-29 14:27:25.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-29 14:27:28.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_tasks.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-29 14:27:29.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - TO_JSON(metrics) AS metrics + metrics_to_json(TO_JSON(metrics)) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - -- convert array of key value pairs to a json object - -- values are nested structs and will be converted to json objects - IF( - ARRAY_LENGTH(ping_info.experiments) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), - ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) - ) - ) AS experiments, + from_map_experiment(ping_info.experiments) AS experiments, FROM `firefox_desktop_background_update.events` WHERE @@ -48,40 +105,10 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} -), -json_metrics AS ( - SELECT - * REPLACE ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) AS metrics - ) - FROM - base ) -- SELECT - json_metrics.* EXCEPT (events), + base.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -89,27 +116,8 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - -- convert array of key value pairs to a json object, cast numbers and booleans if possible - IF( - ARRAY_LENGTH(event.extra) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(event.extra)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(event.extra) - ) - ) - ) AS event_extra, + from_map_event_extra(event.extra) AS event_extra, FROM - json_metrics + base CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-29 14:27:25.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` UNION ALL SELECT submission_timestamp, @@ -80,7 +80,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-29 14:25:13.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-29 14:27:29.000000000 +0000 @@ -1,4 +1,70 @@ -- Generated via bigquery_etl.glean_usage +-- convert array of key value pairs to a json object, cast numbers and booleans if possible +CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(input)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(input) + ) + ) + ) +); + +-- convert array of key value pairs to a json object +-- values are nested structs and will be converted to json objects +CREATE TEMP FUNCTION from_map_experiment( + input ARRAY< + STRUCT>> + > +) +RETURNS json AS ( + IF( + ARRAY_LENGTH(input) = 0, + NULL, + JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) + ) +); + +CREATE TEMP FUNCTION metrics_to_json(metrics JSON) +RETURNS JSON AS ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) +); + WITH base AS ( SELECT * REPLACE ( @@ -26,20 +92,11 @@ ```

⚠️ Only part of the diff is displayed.

Link to full diff

quiiver commented 2 months ago

a little late on this but is there a reason to keep these udfs as temporary vs adding them to our udf deployment?

BenWu commented 1 month ago

Do you mean generally or specifically for the events stream udfs? Generally, temp udfs do something specific to a single query so it doesn't make sense to share them. I think that applies to the events stream udfs because I wrote them with the data types in those columns i mind. I couldn't come up with one that works generally like JSON.parse which is why I wrote two. I think it would be confusing to put them in the udf datasets because the error out if not being used in these specific cases