Following up on https://github.com/mozilla/bigquery-etl/pull/5659, I found the billing project option doesn't work when there are temp udfs (DENG-3905). I'm working on a fix for that but I want to do this in the meantime to get the query working again.
Checklist for reviewer:
[ ] Commits should reference a bug or github issue, if relevant (if a bug is referenced, the pull request should include the bug number in the title).
[ ] If the PR comes from a fork, trigger integration CI tests by running the Push to upstream workflow and provide the <username>:<branch> of the fork as parameter. The parameter will also show up
in the logs of the manual-trigger-required-for-fork CI task together with more detailed instructions.
[ ] If adding a new field to a query, ensure that the schema and dependent downstream schemas have been updated.
[ ] When adding a new derived dataset, ensure that data is not available already (fully or partially) and recommend extending an existing dataset in favor of creating new ones. Data can be available in the bigquery-etl repository, looker-hub or in looker-spoke-default.
For modifications to schemas in restricted namespaces (see CODEOWNERS):
Integration report for "DENG-3889 Replace temp udfs with inline usages"
sql.diff
Click to expand!
```diff
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios/clients_last_seen_joined: schema.yaml
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_mozregression/baseline_clients_last_seen: schema.yaml
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_backend_derived/events_stream_v1/query.sql 2024-05-27 22:40:48.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`accounts_backend.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/events_stream_v1/query.sql 2024-05-27 22:40:49.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`accounts_cirrus.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_frontend_derived/events_stream_v1/query.sql 2024-05-27 22:40:50.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`accounts_frontend.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:40:47.000000000 +0000
@@ -50,7 +50,7 @@
client_info.app_display_version AS version,
ping_info
FROM
- `moz-fx-data-shared-prod.bedrock_live.interaction_v1`
+ `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1`
UNION ALL
SELECT
submission_timestamp,
@@ -60,7 +60,7 @@
client_info.app_display_version AS version,
ping_info
FROM
- `moz-fx-data-shared-prod.bedrock_live.non_interaction_v1`
+ `moz-fx-data-shared-prod.bedrock_live.interaction_v1`
UNION ALL
SELECT
submission_timestamp,
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/events_stream_v1/query.sql 2024-05-27 22:40:51.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`bedrock.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/events_stream_v1/query.sql 2024-05-27 22:40:49.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`burnham.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-27 22:37:05.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-05-27 22:47:02.000000000 +0000
@@ -1,49 +1,49 @@
fields:
-- mode: NULLABLE
- name: submission_date
+- name: submission_date
type: DATE
-- mode: NULLABLE
- name: source
+ mode: NULLABLE
+- name: source
type: STRING
-- mode: NULLABLE
- name: event_type
+ mode: NULLABLE
+- name: event_type
type: STRING
-- mode: NULLABLE
- name: form_factor
+ mode: NULLABLE
+- name: form_factor
type: STRING
-- mode: NULLABLE
- name: country
+ mode: NULLABLE
+- name: country
type: STRING
-- mode: NULLABLE
- name: subdivision1
+ mode: NULLABLE
+- name: subdivision1
type: STRING
-- mode: NULLABLE
- name: advertiser
+ mode: NULLABLE
+- name: advertiser
type: STRING
-- mode: NULLABLE
- name: release_channel
+ mode: NULLABLE
+- name: release_channel
type: STRING
-- mode: NULLABLE
- name: position
+ mode: NULLABLE
+- name: position
type: INTEGER
-- mode: NULLABLE
- name: provider
+ mode: NULLABLE
+- name: provider
type: STRING
-- mode: NULLABLE
- name: match_type
+ mode: NULLABLE
+- name: match_type
type: STRING
-- mode: NULLABLE
- name: normalized_os
+ mode: NULLABLE
+- name: normalized_os
type: STRING
-- mode: NULLABLE
- name: suggest_data_sharing_enabled
+ mode: NULLABLE
+- name: suggest_data_sharing_enabled
type: BOOLEAN
-- mode: NULLABLE
- name: event_count
+ mode: NULLABLE
+- name: event_count
type: INTEGER
-- mode: NULLABLE
- name: user_count
+ mode: NULLABLE
+- name: user_count
type: INTEGER
-- mode: NULLABLE
- name: query_type
+ mode: NULLABLE
+- name: query_type
type: STRING
+ mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-27 22:37:05.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-05-27 22:47:02.000000000 +0000
@@ -1,40 +1,40 @@
fields:
-- mode: NULLABLE
- name: submission_date
+- name: submission_date
type: DATE
-- mode: NULLABLE
- name: form_factor
+ mode: NULLABLE
+- name: form_factor
type: STRING
-- mode: NULLABLE
- name: country
+ mode: NULLABLE
+- name: country
type: STRING
-- mode: NULLABLE
- name: advertiser
+ mode: NULLABLE
+- name: advertiser
type: STRING
-- mode: NULLABLE
- name: normalized_os
+ mode: NULLABLE
+- name: normalized_os
type: STRING
-- mode: NULLABLE
- name: release_channel
+ mode: NULLABLE
+- name: release_channel
type: STRING
-- mode: NULLABLE
- name: position
+ mode: NULLABLE
+- name: position
type: INTEGER
-- mode: NULLABLE
- name: provider
+ mode: NULLABLE
+- name: provider
type: STRING
-- mode: NULLABLE
- name: match_type
+ mode: NULLABLE
+- name: match_type
type: STRING
-- mode: NULLABLE
- name: suggest_data_sharing_enabled
+ mode: NULLABLE
+- name: suggest_data_sharing_enabled
type: BOOLEAN
-- mode: NULLABLE
- name: impression_count
+ mode: NULLABLE
+- name: impression_count
type: INTEGER
-- mode: NULLABLE
- name: click_count
+ mode: NULLABLE
+- name: click_count
type: INTEGER
-- mode: NULLABLE
- name: query_type
+ mode: NULLABLE
+- name: query_type
type: STRING
+ mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-27 22:37:41.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/debug_ping_view_derived/events_stream_v1/query.sql 2024-05-27 22:40:50.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`debug_ping_view.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-27 22:37:05.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-05-27 22:45:55.000000000 +0000
@@ -26,6 +26,9 @@
- name: adjust_network
type: STRING
mode: NULLABLE
+- name: install_source
+ type: STRING
+ mode: NULLABLE
- name: retained_week_2
type: BOOLEAN
mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-27 22:37:05.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-05-27 22:45:54.000000000 +0000
@@ -48,6 +48,10 @@
description: 'The type of source of a client installation.
'
+- name: install_source
+ type: STRING
+ mode: NULLABLE
+ description: null
- name: new_profiles
type: INTEGER
mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/events_stream_v1/query.sql 2024-05-27 22:40:51.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`firefox_desktop_background_defaultagent.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/events_stream_v1/query.sql 2024-05-27 22:40:52.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`firefox_desktop_background_tasks.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-27 22:37:41.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/events_stream_v1/query.sql 2024-05-27 22:40:52.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`firefox_desktop_background_update.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested_map(metrics.labeled_counter),
+ '$.labeled_string',
+ mozfun.json.from_nested_map(metrics.labeled_string),
+ '$.labeled_boolean',
+ mozfun.json.from_nested_map(metrics.labeled_boolean),
+ '$.url',
+ metrics.url2
+ ),
+ '$.url2'
+ ),
+ remove_empty => TRUE
+ ) AS metrics
+ )
+ FROM
+ base
)
--
SELECT
- base.* EXCEPT (events),
+ json_metrics.* EXCEPT (events),
COALESCE(
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(mozfun.map.get_key(event.extra, 'glean_timestamp') AS INT64)),
SAFE.TIMESTAMP_ADD(ping_info.parsed_start_time, INTERVAL event.timestamp MILLISECOND)
@@ -116,8 +89,27 @@
event.category AS event_category,
event.name AS event_name,
ARRAY_TO_STRING([event.category, event.name], '.') AS event, -- handles NULL values better
- from_map_event_extra(event.extra) AS event_extra,
+ -- convert array of key value pairs to a json object, cast numbers and booleans if possible
+ IF(
+ ARRAY_LENGTH(event.extra) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(event.extra)),
+ ARRAY(
+ SELECT
+ CASE
+ WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
+ WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
+ THEN TO_JSON(SAFE_CAST(value AS BOOL))
+ ELSE TO_JSON(value)
+ END
+ FROM
+ UNNEST(event.extra)
+ )
+ )
+ ) AS event_extra,
FROM
- base
+ json_metrics
CROSS JOIN
UNNEST(events) AS event
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-05-27 22:40:48.000000000 +0000
@@ -50,7 +50,7 @@
client_info.app_display_version AS version,
ping_info
FROM
- `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1`
+ `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1`
UNION ALL
SELECT
submission_timestamp,
@@ -60,7 +60,7 @@
client_info.app_display_version AS version,
ping_info
FROM
- `moz-fx-data-shared-prod.firefox_desktop_live.events_v1`
+ `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1`
UNION ALL
SELECT
submission_timestamp,
@@ -80,7 +80,7 @@
client_info.app_display_version AS version,
ping_info
FROM
- `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1`
+ `moz-fx-data-shared-prod.firefox_desktop_live.events_v1`
)
CROSS JOIN
UNNEST(events) AS event,
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-27 22:37:40.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/events_stream_v1/query.sql 2024-05-27 22:40:53.000000000 +0000
@@ -1,70 +1,4 @@
-- Generated via bigquery_etl.glean_usage
--- convert array of key value pairs to a json object, cast numbers and booleans if possible
-CREATE TEMP FUNCTION from_map_event_extra(input ARRAY>)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(
- ARRAY(SELECT key FROM UNNEST(input)),
- ARRAY(
- SELECT
- CASE
- WHEN SAFE_CAST(value AS NUMERIC) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS NUMERIC))
- WHEN SAFE_CAST(value AS BOOL) IS NOT NULL
- THEN TO_JSON(SAFE_CAST(value AS BOOL))
- ELSE TO_JSON(value)
- END
- FROM
- UNNEST(input)
- )
- )
- )
-);
-
--- convert array of key value pairs to a json object
--- values are nested structs and will be converted to json objects
-CREATE TEMP FUNCTION from_map_experiment(
- input ARRAY<
- STRUCT>>
- >
-)
-RETURNS json AS (
- IF(
- ARRAY_LENGTH(input) = 0,
- NULL,
- JSON_OBJECT(ARRAY(SELECT key FROM UNNEST(input)), ARRAY(SELECT value FROM UNNEST(input)))
- )
-);
-
-CREATE TEMP FUNCTION metrics_to_json(metrics JSON)
-RETURNS JSON AS (
- JSON_STRIP_NULLS(
- JSON_REMOVE(
- -- labeled_* are the only ones that SHOULD show up as context for events pings,
- -- thus we special-case them
- --
- -- The JSON_SET/JSON_EXTRACT shenanigans are needed
- -- because those subfields might not exist, so accessing the columns would fail.
- -- but accessing non-existent fields in a JSON object simply gives us NULL.
- JSON_SET(
- metrics,
- '$.labeled_counter',
- mozfun.json.from_nested_map(metrics.labeled_counter),
- '$.labeled_string',
- mozfun.json.from_nested_map(metrics.labeled_string),
- '$.labeled_boolean',
- mozfun.json.from_nested_map(metrics.labeled_boolean),
- '$.url',
- metrics.url2
- ),
- '$.url2'
- ),
- remove_empty => TRUE
- )
-);
-
WITH base AS (
SELECT
* REPLACE (
@@ -92,11 +26,20 @@
ping_info.parsed_end_time,
ping_info.ping_type
) AS ping_info,
- metrics_to_json(TO_JSON(metrics)) AS metrics
+ TO_JSON(metrics) AS metrics
),
client_info.client_id AS client_id,
ping_info.reason AS reason,
- from_map_experiment(ping_info.experiments) AS experiments,
+ -- convert array of key value pairs to a json object
+ -- values are nested structs and will be converted to json objects
+ IF(
+ ARRAY_LENGTH(ping_info.experiments) = 0,
+ NULL,
+ JSON_OBJECT(
+ ARRAY(SELECT key FROM UNNEST(ping_info.experiments)),
+ ARRAY(SELECT value FROM UNNEST(ping_info.experiments))
+ )
+ ) AS experiments,
FROM
`firefox_desktop.events`
WHERE
@@ -105,10 +48,40 @@
{% else %}
DATE(submission_timestamp) = @submission_date
{% endif %}
+),
+json_metrics AS (
+ SELECT
+ * REPLACE (
+ JSON_STRIP_NULLS(
+ JSON_REMOVE(
+ -- labeled_* are the only ones that SHOULD show up as context for events pings,
+ -- thus we special-case them
+ --
+ -- The JSON_SET/JSON_EXTRACT shenanigans are needed
+ -- because those subfields might not exist, so accessing the columns would fail.
+ -- but accessing non-existent fields in a JSON object simply gives us NULL.
+ JSON_SET(
+ metrics,
+ '$.labeled_counter',
+ mozfun.json.from_nested
```
Following up on https://github.com/mozilla/bigquery-etl/pull/5659, I found the billing project option doesn't work when there are temp udfs (DENG-3905). I'm working on a fix for that but I want to do this in the meantime to get the query working again.
Checklist for reviewer:
<username>:<branch>
of the fork as parameter. The parameter will also show up in the logs of themanual-trigger-required-for-fork
CI task together with more detailed instructions.For modifications to schemas in restricted namespaces (see
CODEOWNERS
):┆Issue is synchronized with this Jira Task