mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
246 stars 98 forks source link

[DENG-3889] Replace events_stream temp udfs with inline usages #5666

Closed BenWu closed 2 months ago

BenWu commented 2 months ago

Following up on https://github.com/mozilla/bigquery-etl/pull/5659, I found the billing project option doesn't work when there are temp udfs (DENG-3905). I'm working on a fix for that but I want to do this in the meantime to get the query working again.

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 months ago

Integration report for "DENG-3889 Replace temp udfs with inline usages"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios/clients_last_seen_joined: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_mozregression/baseline_clients_last_seen: schema.yaml diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-27 22:40:48.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `accounts_backend.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-27 22:40:49.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `accounts_cirrus.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-27 22:40:50.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `accounts_frontend.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:40:47.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.interaction_v1` UNION ALL SELECT submission_timestamp, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-27 22:40:51.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `bedrock.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-27 22:40:49.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `burnham.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-27 22:37:05.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-27 22:47:02.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-27 22:37:05.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-27 22:47:02.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-27 22:37:41.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-27 22:40:50.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `debug_ping_view.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-27 22:37:05.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-27 22:45:55.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-27 22:37:05.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-27 22:45:54.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-27 22:40:51.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `firefox_desktop_background_defaultagent.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-27 22:40:52.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `firefox_desktop_background_tasks.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-27 22:37:41.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-27 22:40:52.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `firefox_desktop_background_update.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested_map(metrics.labeled_counter), + '$.labeled_string', + mozfun.json.from_nested_map(metrics.labeled_string), + '$.labeled_boolean', + mozfun.json.from_nested_map(metrics.labeled_boolean), + '$.url', + metrics.url2 + ), + '$.url2' + ), + remove_empty => TRUE + ) AS metrics + ) + FROM + base ) -- SELECT - base.* EXCEPT (events), + json_metrics.* EXCEPT (events), COALESCE( SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)), SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND) @@ -116,8 +89,27 @@ event.category AS event_category, event.name AS event_name, ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better - from_map_event_extra(event.extra) AS event_extra, + -- convert array of key value pairs to a json object, cast numbers and booleans if possible + IF( + ARRAY_LENGTH(event.extra) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(event.extra)), + ARRAY( + SELECT + CASE + WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) + WHEN SAFE_CAST(value AS BOOL) IS NOT NULL + THEN TO_JSON(SAFE_CAST(value AS BOOL)) + ELSE TO_JSON(value) + END + FROM + UNNEST(event.extra) + ) + ) + ) AS event_extra, FROM - base + json_metrics CROSS JOIN UNNEST(events) AS event diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:40:48.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` UNION ALL SELECT submission_timestamp, @@ -80,7 +80,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-27 22:40:53.000000000 +0000 @@ -1,70 +1,4 @@ -- Generated via bigquery_etl.glean_usage --- convert array of key value pairs to a json object, cast numbers and booleans if possible -CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT( - ARRAY(SELECT key FROM UNNEST(input)), - ARRAY( - SELECT - CASE - WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS NUMERIC)) - WHEN SAFE_CAST(value AS BOOL) IS NOT NULL - THEN TO_JSON(SAFE_CAST(value AS BOOL)) - ELSE TO_JSON(value) - END - FROM - UNNEST(input) - ) - ) - ) -); - --- convert array of key value pairs to a json object --- values are nested structs and will be converted to json objects -CREATE TEMP FUNCTION from_map_experiment( - input ARRAY< - STRUCT>> - > -) -RETURNS json AS ( - IF( - ARRAY_LENGTH(input) = 0, - NULL, - JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input))) - ) -); - -CREATE TEMP FUNCTION metrics_to_json(metrics JSON) -RETURNS JSON AS ( - JSON_STRIP_NULLS( - JSON_REMOVE( - -- labeled_* are the only ones that SHOULD show up as context for events pings, - -- thus we special-case them - -- - -- The JSON_SET/JSON_EXTRACT shenanigans are needed - -- because those subfields might not exist, so accessing the columns would fail. - -- but accessing non-existent fields in a JSON object simply gives us NULL. - JSON_SET( - metrics, - '$.labeled_counter', - mozfun.json.from_nested_map(metrics.labeled_counter), - '$.labeled_string', - mozfun.json.from_nested_map(metrics.labeled_string), - '$.labeled_boolean', - mozfun.json.from_nested_map(metrics.labeled_boolean), - '$.url', - metrics.url2 - ), - '$.url2' - ), - remove_empty => TRUE - ) -); - WITH base AS ( SELECT * REPLACE ( @@ -92,11 +26,20 @@ ping_info.parsed_end_time, ping_info.ping_type ) AS ping_info, - metrics_to_json(TO_JSON(metrics)) AS metrics + TO_JSON(metrics) AS metrics ), client_info.client_id AS client_id, ping_info.reason AS reason, - from_map_experiment(ping_info.experiments) AS experiments, + -- convert array of key value pairs to a json object + -- values are nested structs and will be converted to json objects + IF( + ARRAY_LENGTH(ping_info.experiments) = 0, + NULL, + JSON_OBJECT( + ARRAY(SELECT key FROM UNNEST(ping_info.experiments)), + ARRAY(SELECT value FROM UNNEST(ping_info.experiments)) + ) + ) AS experiments, FROM `firefox_desktop.events` WHERE @@ -105,10 +48,40 @@ {% else %} DATE(submission_timestamp) = @submission_date {% endif %} +), +json_metrics AS ( + SELECT + * REPLACE ( + JSON_STRIP_NULLS( + JSON_REMOVE( + -- labeled_* are the only ones that SHOULD show up as context for events pings, + -- thus we special-case them + -- + -- The JSON_SET/JSON_EXTRACT shenanigans are needed + -- because those subfields might not exist, so accessing the columns would fail. + -- but accessing non-existent fields in a JSON object simply gives us NULL. + JSON_SET( + metrics, + '$.labeled_counter', + mozfun.json.from_nested ```

⚠️ Only part of the diff is displayed.

Link to full diff