[ ] Commits should reference a bug or github issue, if relevant (if a bug is referenced, the pull request should include the bug number in the title).
[ ] If the PR comes from a fork, trigger integration CI tests by running the Push to upstream workflow and provide the <username>:<branch> of the fork as parameter. The parameter will also show up
in the logs of the manual-trigger-required-for-fork CI task together with more detailed instructions.
[ ] If adding a new field to a query, ensure that the schema and dependent downstream schemas have been updated.
[ ] When adding a new derived dataset, ensure that data is not available already (fully or partially) and recommend extending an existing dataset in favor of creating new ones. Data can be available in the bigquery-etl repository, looker-hub or in looker-spoke-default.
For modifications to schemas in restricted namespaces (see CODEOWNERS):
Integration report for "Add broken UDF for testing"
sql.diff
Click to expand!
```diff
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 19:28:20.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 19:46:00.000000000 +0000
@@ -276,6 +276,21 @@
)
)
+ monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query(
+ task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1",
+ destination_table="monitor_dashboard_user_journey_funnels_v1",
+ dataset_id="monitor_frontend_derived",
+ project_id="moz-fx-data-shared-prod",
+ owner="ksiegler@mozilla.org",
+ email=[
+ "ascholtz@mozilla.com",
+ "ksiegler@mozilla.org",
+ "telemetry-alerts@mozilla.com",
+ ],
+ date_partition_parameter="submission_date",
+ depends_on_past=False,
+ )
+
accounts_frontend_derived__email_first_reg_login_funnels__v1.set_upstream(
wait_for_copy_deduplicate_all
)
@@ -325,3 +340,7 @@
firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_stdout_events__v1
)
+
+ monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream(
+ wait_for_copy_deduplicate_all
+ )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py 2024-07-26 19:28:20.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py 2024-07-26 19:45:57.000000000 +0000
@@ -88,12 +88,12 @@
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
- checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check(
- task_id="checks__warn_internet_outages__global_outages__v1",
+ checks__fail_internet_outages__global_outages__v1 = bigquery_dq_check(
+ task_id="checks__fail_internet_outages__global_outages__v1",
source_table="global_outages_v1",
dataset_id="internet_outages",
project_id="moz-fx-data-shared-prod",
- is_dq_check_fail=False,
+ is_dq_check_fail=True,
owner="aplacitelli@mozilla.com",
email=["aplacitelli@mozilla.com"],
depends_on_past=False,
@@ -101,9 +101,9 @@
retries=0,
)
- checks__warn_internet_outages__global_outages__v2 = bigquery_dq_check(
- task_id="checks__warn_internet_outages__global_outages__v2",
- source_table="global_outages_v2",
+ checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check(
+ task_id="checks__warn_internet_outages__global_outages__v1",
+ source_table="global_outages_v1",
dataset_id="internet_outages",
project_id="moz-fx-data-shared-prod",
is_dq_check_fail=False,
@@ -125,25 +125,14 @@
depends_on_past=False,
)
- internet_outages__global_outages__v2 = bigquery_etl_query(
- task_id="internet_outages__global_outages__v2",
- destination_table="global_outages_v2",
- dataset_id="internet_outages",
- project_id="moz-fx-data-shared-prod",
- owner="aplacitelli@mozilla.com",
- email=["aplacitelli@mozilla.com"],
- date_partition_parameter="submission_date",
- depends_on_past=False,
+ checks__fail_internet_outages__global_outages__v1.set_upstream(
+ internet_outages__global_outages__v1
)
checks__warn_internet_outages__global_outages__v1.set_upstream(
internet_outages__global_outages__v1
)
- checks__warn_internet_outages__global_outages__v2.set_upstream(
- internet_outages__global_outages__v2
- )
-
internet_outages__global_outages__v1.set_upstream(wait_for_copy_deduplicate_all)
internet_outages__global_outages__v1.set_upstream(
@@ -153,13 +142,3 @@
internet_outages__global_outages__v1.set_upstream(
wait_for_telemetry_derived__clients_daily_joined__v1
)
-
- internet_outages__global_outages__v2.set_upstream(wait_for_copy_deduplicate_all)
-
- internet_outages__global_outages__v2.set_upstream(
- wait_for_copy_deduplicate_main_ping
- )
-
- internet_outages__global_outages__v2.set_upstream(
- wait_for_telemetry_derived__clients_daily_joined__v1
- )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-07-26 19:28:20.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-07-26 19:45:57.000000000 +0000
@@ -158,23 +158,6 @@
search_derived__mobile_search_clients_daily__v1
)
- search_derived__mobile_search_clients_daily__v2 = bigquery_etl_query(
- task_id="search_derived__mobile_search_clients_daily__v2",
- destination_table="mobile_search_clients_daily_v2",
- dataset_id="search_derived",
- project_id="moz-fx-data-shared-prod",
- owner="cmorales@mozilla.com",
- email=[
- "akomar@mozilla.com",
- "akommasani@mozilla.com",
- "anicholson@mozilla.com",
- "cmorales@mozilla.com",
- "telemetry-alerts@mozilla.com",
- ],
- date_partition_parameter="submission_date",
- depends_on_past=False,
- )
-
search_derived__mobile_search_clients_last_seen__v1 = bigquery_etl_query(
task_id="search_derived__mobile_search_clients_last_seen__v1",
destination_table="mobile_search_clients_last_seen_v1",
@@ -199,10 +182,6 @@
wait_for_copy_deduplicate_all
)
- search_derived__mobile_search_clients_daily__v2.set_upstream(
- wait_for_copy_deduplicate_all
- )
-
search_derived__mobile_search_clients_last_seen__v1.set_upstream(
search_derived__mobile_search_clients_daily__v1
)
Only in /tmp/workspace/generated-sql/sql/mozfun/addons: is_adblocker_broken
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages_v2
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived: mobile_search_clients_daily_v2
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 2024-07-26 19:22:57.000000000 +0000
@@ -0,0 +1,2 @@
+description: Returns whether a given Addon ID is an adblocker.
+friendly_name: Is Adblocker
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md
--- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 2024-07-26 19:22:57.000000000 +0000
@@ -0,0 +1,16 @@
+Determine if a given Addon ID is for an adblocker.
+
+As an example, this query will give the number of users who
+have an adblocker installed.
+```
+SELECT
+ submission_date,
+ COUNT(DISTINCT client_id) AS dau,
+FROM
+ mozdata.telemetry.addons
+WHERE
+ mozfun.addons.is_adblocker(addon_id)
+ AND submission_date >= "2023-01-01"
+GROUP BY
+ submission_date
+```
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql
--- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 2024-07-26 19:22:57.000000000 +0000
@@ -0,0 +1,19 @@
+CREATE OR REPLACE FUNCTION addons.is_adblocker_broken(addon_id STRING)
+RETURNS BOOLEAN AS (
+ addon_id IS NOT NULL
+ AND addon_id IN (
+ 'uBlock0@raymondhill.net', -- 'uBlock_Origin'
+ '{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', -- 'AdBlock_Plus'
+ 'jid1-NIfFY2CA8fy1tg@jetpack', -- 'Adblock'
+ 'firefox@ghostery.com', -- 'Ghostery'
+ 'adblockultimate@adblockultimate.net' -- 'AdBlock_Ultimate'
+ )
+);
+
+-- Tests
+SELECT
+ assert.true(addons.is_adblocker_broken('uBlock0@raymondhill.net')),
+ assert.true(addons.is_adblocker_broken('adblockultimate@adblockultimate.net')),
+ assert.false(addons.is_adblocker_broken('failing test')),
+ assert.false(addons.is_adblocker_broken('')),
+ assert.false(addons.is_adblocker_broken(NULL)),
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 19:33:47.000000000 +0000
@@ -1,49 +1,49 @@
fields:
-- mode: NULLABLE
- name: submission_date
+- name: submission_date
type: DATE
-- mode: NULLABLE
- name: source
+ mode: NULLABLE
+- name: source
type: STRING
-- mode: NULLABLE
- name: event_type
+ mode: NULLABLE
+- name: event_type
type: STRING
-- mode: NULLABLE
- name: form_factor
+ mode: NULLABLE
+- name: form_factor
type: STRING
-- mode: NULLABLE
- name: country
+ mode: NULLABLE
+- name: country
type: STRING
-- mode: NULLABLE
- name: subdivision1
+ mode: NULLABLE
+- name: subdivision1
type: STRING
-- mode: NULLABLE
- name: advertiser
+ mode: NULLABLE
+- name: advertiser
type: STRING
-- mode: NULLABLE
- name: release_channel
+ mode: NULLABLE
+- name: release_channel
type: STRING
-- mode: NULLABLE
- name: position
+ mode: NULLABLE
+- name: position
type: INTEGER
-- mode: NULLABLE
- name: provider
+ mode: NULLABLE
+- name: provider
type: STRING
-- mode: NULLABLE
- name: match_type
+ mode: NULLABLE
+- name: match_type
type: STRING
-- mode: NULLABLE
- name: normalized_os
+ mode: NULLABLE
+- name: normalized_os
type: STRING
-- mode: NULLABLE
- name: suggest_data_sharing_enabled
+ mode: NULLABLE
+- name: suggest_data_sharing_enabled
type: BOOLEAN
-- mode: NULLABLE
- name: event_count
+ mode: NULLABLE
+- name: event_count
type: INTEGER
-- mode: NULLABLE
- name: user_count
+ mode: NULLABLE
+- name: user_count
type: INTEGER
-- mode: NULLABLE
- name: query_type
+ mode: NULLABLE
+- name: query_type
type: STRING
+ mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 19:33:47.000000000 +0000
@@ -1,40 +1,40 @@
fields:
-- mode: NULLABLE
- name: submission_date
+- name: submission_date
type: DATE
-- mode: NULLABLE
- name: form_factor
+ mode: NULLABLE
+- name: form_factor
type: STRING
-- mode: NULLABLE
- name: country
+ mode: NULLABLE
+- name: country
type: STRING
-- mode: NULLABLE
- name: advertiser
+ mode: NULLABLE
+- name: advertiser
type: STRING
-- mode: NULLABLE
- name: normalized_os
+ mode: NULLABLE
+- name: normalized_os
type: STRING
-- mode: NULLABLE
- name: release_channel
+ mode: NULLABLE
+- name: release_channel
type: STRING
-- mode: NULLABLE
- name: position
+ mode: NULLABLE
+- name: position
type: INTEGER
-- mode: NULLABLE
- name: provider
+ mode: NULLABLE
+- name: provider
type: STRING
-- mode: NULLABLE
- name: match_type
+ mode: NULLABLE
+- name: match_type
type: STRING
-- mode: NULLABLE
- name: suggest_data_sharing_enabled
+ mode: NULLABLE
+- name: suggest_data_sharing_enabled
type: BOOLEAN
-- mode: NULLABLE
- name: impression_count
+ mode: NULLABLE
+- name: impression_count
type: INTEGER
-- mode: NULLABLE
- name: click_count
+ mode: NULLABLE
+- name: click_count
type: INTEGER
-- mode: NULLABLE
- name: query_type
+ mode: NULLABLE
+- name: query_type
type: STRING
+ mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 19:32:35.000000000 +0000
@@ -26,6 +26,9 @@
- name: adjust_network
type: STRING
mode: NULLABLE
+- name: install_source
+ type: STRING
+ mode: NULLABLE
- name: retained_week_2
type: BOOLEAN
mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 19:32:36.000000000 +0000
@@ -48,6 +48,10 @@
description: 'The type of source of a client installation.
'
+- name: install_source
+ type: STRING
+ mode: NULLABLE
+ description: null
- name: new_profiles
type: INTEGER
mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-07-26 19:22:45.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-07-26 19:33:28.000000000 +0000
@@ -1,11 +1,11 @@
fields:
- name: normalized_app_id
- mode: NULLABLE
type: STRING
+ mode: NULLABLE
description: App ID of the channel data was received from
- name: normalized_channel
- mode: NULLABLE
type: STRING
+ mode: NULLABLE
description: Normalized channel name
- name: additional_properties
type: STRING
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 19:31:31.000000000 +0000
@@ -6,9 +6,10 @@
- name: site_url
type: STRING
mode: NULLABLE
- description: |-
- For domain properties, this will be `sc-domain:` followed by the domain name.
- For URL-prefix properties, it will be the full URL of the property definition.
+ description: 'For domain properties, this will be `sc-domain:` followed by the domain
+ name.
+
+ For URL-prefix properties, it will be the full URL of the property definition.'
- name: site_domain_name
type: STRING
mode: NULLABLE
@@ -16,103 +17,110 @@
- name: page_url
type: STRING
mode: NULLABLE
- description: |-
- The final page URL linked by a search result after any skip redirects.
- This will be null for anonymized Discover impressions.
+ description: 'The final page URL linked by a search result after any skip redirects.
+
+ This will be null for anonymized Discover impressions.'
- name: page_domain_name
type: STRING
mode: NULLABLE
- description: |-
- Domain name of the page URL.
- This will be null for anonymized Discover impressions.
+ description: 'Domain name of the page URL.
+
+ This will be null for anonymized Discover impressions.'
- name: page_path
type: STRING
mode: NULLABLE
- description: |-
- The path part of the page URL.
- This will be null for anonymized Discover impressions.
+ description: 'The path part of the page URL.
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site_code
type: STRING
mode: NULLABLE
- description: |-
- Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Localized site code such as `en-US` or `de` found in the first segment
+ of the page URL path (if any).
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site
type: STRING
mode: NULLABLE
- description: |-
- Description of the localized site language and/or country based on `localized_site_code` (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Description of the localized site language and/or country based on
+ `localized_site_code` (if any).
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site_language_code
type: STRING
mode: NULLABLE
- description: |-
- Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Localized site language code in ISO-639-alpha-2 format found in the
+ first segment of the page URL path (if any).
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site_language
type: STRING
mode: NULLABLE
- description: |-
- Localized site language based on `localized_site_language_code` (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Localized site language based on `localized_site_language_code` (if
+ any).
+
+ This will be null for anonymized Discover impressions.'
- name: query
type: STRING
mode: NULLABLE
- description: |-
- The search query.
- This will be null for anonymized search impressions, and all Discover and Google News search impressions.
+ description: 'The search query.
+
+ This will be null for anonymized search impressions, and all Discover and Google
+ News search impressions.'
- name: query_type
type: STRING
mode: NULLABLE
- description: |-
- Type of search query:
- * Anonymized: Query was redacted by Google to protect the users' privacy.
- * Brand: Query contained one or more Mozilla brand keywords.
- * Non-Brand: Query didn't contain any Mozilla brand keywords.
- * Unknown: Query couldn't be classified.
- This will be null for all Discover and Google News search impressions.
+ description: "Type of search query:\n * Anonymized: Query was redacted by Google\
+ \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\
+ \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\
+ \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\
+ \ and Google News search impressions."
- name: is_anonymized
type: BOOLEAN
mode: NULLABLE
- description: |-
- Whether Google has anonymized the search impression to protect the users' privacy.
+ description: 'Whether Google has anonymized the search impression to protect the
+ users'' privacy.
+
The `query` field will be null for anonymized search impressions.
- The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions.
+
+ The `country_code`, `page_url`, and related fields will be null for anonymized
+ Discover impressions.'
- name: has_good_page_experience
type: BOOLEAN
mode: NULLABLE
- description: |-
- Whether Google Search considers the page to be providing a good page experience.
- This will be null when the source data wasn't exported directly to BigQuery by Google.
+ description: 'Whether Google Search considers the page to be providing a good page
+ experience.
+
+ This will be null when the source data wasn''t exported directly to BigQuery by
+ Google.'
- name: search_type
type: STRING
mode: NULLABLE
- description: |-
- Where the link was seen by the user:
- * Web: In Google Search's default "All" tab.
- * Image: In Google Search's "Images" tab.
- * Video: In Google Search's "Videos" tab.
- * News: In Google Search's "News" tab.
- * Discover: In Google's Discover feed.
- * Google News: On news.google.com or in the Google News app on Android and iOS.
+ description: "Where the link was seen by the user:\n * Web: In Google Search's\
+ \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\
+ \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\
+ \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\
+ \ or in the Google News app on Android and iOS."
- name: search_appearance
type: STRING
mode: NULLABLE
- description: |-
- How the search result appeared (e.g. normal result, translated result, video).
- This will be null when the source data wasn't exported directly to BigQuery by Google.
+ description: 'How the search result appeared (e.g. normal result, translated result,
+ video).
+
+ This will be null when the source data wasn''t exported directly to BigQuery by
+ Google.'
- name: user_country_code
type: STRING
mode: NULLABLE
- description: |-
- Country from which the user was searching, in ISO-3166-1-alpha-3 format.
- This will be null for anonymized Discover impressions.
+ description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format.
+
+ This will be null for anonymized Discover impressions.'
- name: user_country
type: STRING
mode: NULLABLE
- description: |-
- Country from which the user was searching.
- This will be null for anonymized Discover impressions.
+ description: 'Country from which the user was searching.
+
+ This will be null for anonymized Discover impressions.'
- name: user_region
type: STRING
mode: NULLABLE
@@ -124,13 +132,15 @@
- name: device_type
type: STRING
mode: NULLABLE
- description: |-
- The type of device on which the user was searching: Desktop, Mobile, or Tablet.
- This will be null for Discover impressions.
+ description: 'The type of device on which the user was searching: Desktop, Mobile,
+ or Tablet.
+
+ This will be null for Discover impressions.'
- name: impressions
type: INTEGER
mode: NULLABLE
- description: The number of times that search results with a link to the page were shown to a user.
+ description: The number of times that search results with a link to the page were
+ shown to a user.
- name: clicks
type: INTEGER
mode: NULLABLE
@@ -138,6 +148,7 @@
- name: average_position
type: FLOAT
mode: NULLABLE
- description: |-
- The average position of the page in the search results, where `1` is the topmost position.
- This will be null for Discover and Google News search impressions.
+ description: 'The average position of the page in the search results, where `1`
+ is the topmost position.
+
+ This will be null for Discover and Google News search impressions.'
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 19:31:31.000000000 +0000
@@ -6,9 +6,10 @@
- name: site_url
type: STRING
mode: NULLABLE
- description: |-
- For domain properties, this will be `sc-domain:` followed by the domain name.
- For URL-prefix properties, it will be the full URL of the property definition.
+ description: 'For domain properties, this will be `sc-domain:` followed by the domain
+ name.
+
+ For URL-prefix properties, it will be the full URL of the property definition.'
- name: site_domain_name
type: STRING
mode: NULLABLE
@@ -20,27 +21,23 @@
- name: query_type
type: STRING
mode: NULLABLE
- description: |-
- Type of search query:
- * Anonymized: Query was redacted by Google to protect the users' privacy.
- * Brand: Query contained one or more Mozilla brand keywords.
- * Non-Brand: Query didn't contain any Mozilla brand keywords.
- * Unknown: Query couldn't be classified.
+ description: "Type of search query:\n * Anonymized: Query was redacted by Google\
+ \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\
+ \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\
+ \ * Unknown: Query couldn't be classified."
- name: is_anonymized
type: BOOLEAN
mode: NULLABLE
- description: |-
- Whether Google has anonymized the search impression to protect the users' privacy.
- The `query` field will be null for anonymized search impressions.
+ description: 'Whether Google has anonymized the search impression to protect the
+ users'' privacy.
+
+ The `query` field will be null for anonymized search impressions.'
- name: search_type
type: STRING
mode: NULLABLE
- description: |-
- Where the link was seen by the user:
- * Web: In Google Search's default "All" tab.
- * Image: In Google Search's "Images" tab.
- * Video: In Google Search's "Videos" tab.
- * News: In Google Search's "News" tab.
+ description: "Where the link was seen by the user:\n * Web: In Google Search's\
+ \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\
+ \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab."
- name: user_country_code
type: STRING
mode: NULLABLE
@@ -60,17 +57,20 @@
- name: device_type
type: STRING
mode: NULLABLE
- description: |-
- The type of device on which the user was searching: Desktop, Mobile, or Tablet.
+ description: 'The type of device on which the user was searching: Desktop, Mobile,
+ or Tablet.'
- name: impressions
type: INTEGER
mode: NULLABLE
- description: The number of times that search results with at least one link to the site were shown to a user.
+ description: The number of times that search results with at least one link to the
+ site were shown to a user.
- name: clicks
type: INTEGER
mode: NULLABLE
- description: The number of times a user clicked at least one search result link to the site.
+ description: The number of times a user clicked at least one search result link
+ to the site.
- name: average_top_position
type: FLOAT
mode: NULLABLE
- description: The average top position of the site in the search results, where `1` is the topmost position.
+ description: The average top position of the site in the search results, where `1`
+ is the topmost position.
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 2024-07-26 19:22:45.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,15 +0,0 @@
-friendly_name: Global Outages
-description: |-
- Please provide a description for the query
-owners: []
-labels: {}
-bigquery: null
-workgroup_access:
-- role: roles/bigquery.dataViewer
- members:
- - workgroup:mozilla-confidential
- - workgroup:internet-outages/external
-references:
- view.sql:
- - moz-fx-data-shared-prod.internet_outages.global_outages_v1
- - moz-fx-data-shared-prod.internet_outages.global_outages_v2
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 2024-07-26 19:22:45.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,58 +0,0 @@
-fields:
-- name: country
- type: STRING
- mode: NULLABLE
-- name: city
- type: STRING
- mode: NULLABLE
-- name: geo_subdivision1
- type: STRING
- mode: NULLABLE
-- name: geo_subdivision2
- type: STRING
- mode: NULLABLE
-- name: datetime
- type: TIMESTAMP
- mode: NULLABLE
-- name: proportion_undefined
- type: FLOAT
- mode: NULLABLE
-- name: proportion_timeout
- type: FLOAT
- mode: NULLABLE
-- name: proportion_abort
- type: FLOAT
- mode: NULLABLE
-- name: proportion_unreachable
- type: FLOAT
- mode: NULLABLE
-- name: proportion_terminated
- type: FLOAT
- mode: NULLABLE
-- name: proportion_channel_open
- type: FLOAT
- mode: NULLABLE
-- name: avg_dns_success_time
- type: FLOAT
- mode: NULLABLE
-- name: missing_dns_success
- type: FLOAT
- mode: NULLABLE
-- name: avg_dns_failure_time
- type: FLOAT
- mode: NULLABLE
-- name: missing_dns_failure
- type: FLOAT
- mode: NULLABLE
-- name: count_dns_failure
- type: FLOAT
- mode: NULLABLE
-- name: ssl_error_prop
- type: FLOAT
- mode: NULLABLE
-- name: avg_tls_handshake_time
- type: FLOAT
- mode: NULLABLE
-- name: dataset_version
- type: STRING
- mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 2024-07-26 19:22:08.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 1970-01-01 00:00:00.000000000 +0000
@@ -1,52 +0,0 @@
-CREATE OR REPLACE VIEW
- `moz-fx-data-shared-prod.internet_outages.global_outages`
-AS
-SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`,
- proportion_undefined,
- proportion_timeout,
- proportion_abort,
- proportion_unreachable,
- proportion_terminated,
- proportion_channel_open,
- avg_dns_success_time,
- missing_dns_success,
- avg_dns_failure_time,
- missing_dns_failure,
- count_dns_failure,
- ssl_error_prop,
- avg_tls_handshake_time,
- "v2" AS dataset_version,
-FROM
- `moz-fx-data-shared-prod.internet_outages.global_outages_v2`
-WHERE
- DATE(`datetime`) >= "2024-07-26"
-UNION ALL
-SELECT
- country,
- city,
- CAST(NULL AS STRING) AS geo_subdivision1,
- CAST(NULL AS STRING) AS geo_subdivision2,
- `datetime`,
- proportion_undefined,
- proportion_timeout,
- proportion_abort,
- proportion_unreachable,
- proportion_terminated,
- proportion_channel_open,
- avg_dns_success_time,
- missing_dns_success,
- avg_dns_failure_time,
- missing_dns_failure,
- count_dns_failure,
- ssl_error_prop,
- avg_tls_handshake_time,
- "v1" AS dataset_version,
-FROM
- `moz-fx-data-shared-prod.internet_outages.global_outages_v1`
-WHERE
- DATE(`datetime`) < "2024-07-26"
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 19:22:08.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 19:22:57.000000000 +0000
@@ -1,10 +1,10 @@
-#warn
+#fail
{{ min_row_count(1000, where="DATE(datetime) = @submission_date") }}
-#warn
+#fail
{{ is_unique(columns=["datetime", "city", "country"], where="DATE(`datetime`) = @submission_date") }}
-#warn
+#fail
/*
This statement used to contain the following fields,
but these are sometimes missing from country/city combinations
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 19:23:59.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 19:41:12.000000000 +0000
@@ -1,4 +1,4 @@
-friendly_name: Internet Outages (Version 1)
+friendly_name: Internet Outages
description: |-
This contains a set aggregated metrics that correlate to internet
outages for different countries in the world.
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 2024-07-26 19:22:08.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 1970-01-01 00:00:00.000000000 +0000
@@ -1,38 +0,0 @@
-{% set _WHERE = '@submission_date = @submission_date' %}
-
-#warn
-{{ min_row_count(1000, where=_WHERE) }}
-
-#warn
-{{ is_unique(columns=["country", "city", "geo_subdivision1", "geo_subdivision2", "datetime"], where=_WHERE) }}
-
-#warn
-/*
- This statement used to contain the following fields,
- but these are sometimes missing from country/city combinations
- See https://sql.telemetry.mozilla.org/queries/96541/source
- and bug 1868674
-
- "avg_tls_handshake_time"
- "count_dns_failure"
-*/
-{{ not_null(columns=[
- "country",
- "city",
- "datetime",
- "proportion_undefined",
- "proportion_timeout",
- "proportion_abort",
- "proportion_unreachable",
- "proportion_terminated",
- "proportion_channel_open",
- "avg_dns_success_time",
- "missing_dns_success",
- "avg_dns_failure_time",
- "missing_dns_failure",
- "ssl_error_prop",
-
-], where=_WHERE) }}
-
-#warn
-{{ value_length(column="country", expected_length=2, where=_WHERE) }}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 2024-07-26 19:23:59.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,98 +0,0 @@
-friendly_name: Internet Outages (version 2)
-description: |-
- Note: Version 2 contains two additional fields: geo_subdivision1 && geo_subdivision2 for more accurate data grouping
- and the data available starting 2024/07/26.
-
- This contains a set aggregated metrics that correlate to internet
- outages for different countries in the world.
-
- The dataset contains the following fields:
- - `country`:
- the Country code of the client.
- - `city`:
- the City name (only for cities with a population >= 15000, 'unknown' otherwise).
- - `geo_subdivision1`:
- Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces,
- French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes.
- - `geo_subdivision2`:
- Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces.
- - `datetime`:
- the date and the time (truncated to hour) the data was submitted by the client.
- - `proportion_undefined`:
- the proportion of users who failed to send telemetry for a reason that was not listed in the other cases.
- - `proportion_timeout`:
- the proportion of users that had their connection timeout while uploading telemetry
- ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)).
- - `proportion_abort`:
- the proportion of users that had their connection terminated by the client (for example, terminating open connections before shutting down).
- - `proportion_unreachable`:
- the proportion of users that failed to upload telemetry because the server was not reachable
- (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension).
- - `proportion_terminated`:
- the proportion of users that had their connection terminated internally by the networking code.
- - `proportion_channel_open`:
- the proportion of users for which the upload request was terminated immediately, by the client, because of a Necko internal error.
- - `avg_dns_success_time`:
- the average time it takes for a successful DNS resolution, in milliseconds.
- - `missing_dns_success`:
- counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram.
- - `avg_dns_failure_time`:
- the average time it takes for an unsuccessful DNS resolution, in milliseconds.
- - `missing_dns_failure`:
- counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram.
- - `count_dns_failure`:
- the average count of unsuccessful DNS resolutions reported.
- - `ssl_error_prop`:
- the proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram.
- - `avg_tls_handshake_time`:
- the average time after the TCP SYN to ready for HTTP, in milliseconds.
-
- Caveats with the data:
- As with any observational data, there are many caveats and interpretation must
- be done carefully. Below is a list of issues we have considered, but it is not
- exhaustive.
- - Firefox users are not representative of the general population in their
- region.
- - Users can experience multiple types of failures and so the proportions
- are not summable. For example, if 2.4% of clients had a timeout and 2.6% of
- clients had eUnreachable that doesn't necessarily mean that 5.0% of clients
- had a timeout or a eUnreachable
- - Geographical data is based on IPGeo databases. These databases are
- imperfect, so some activity may be attributed to the wrong location.
- Further, proxy and VPN usage can create geo-attribution errors.
- - Data contains duplicates prior to mid-2022. See [this bug](https://bugzilla.mozilla.org/show_bug.cgi?id=1753470)
- for further information.
-owners:
-- aplacitelli@mozilla.com
-labels:
- incremental: true
- review_bugs:
- - '1640204'
- dag: bqetl_internet_outages
- owner1: aplacitelli
-scheduling:
- dag_name: bqetl_internet_outages
-bigquery:
- time_partitioning:
- type: day
- field: datetime
- require_partition_filter: false
- expiration_days: null
- range_partitioning: null
- clustering:
- fields:
- - country
- - city
-workgroup_access:
-- role: roles/bigquery.dataViewer
- members:
- - workgroup:mozilla-confidential
- - workgroup:internet-outages/external
-references:
- checks.sql:
- - ..
- query.sql:
- - C
- - moz-fx-data-shared-prod.telemetry.clients_daily
- - moz-fx-data-shared-prod.telemetry.health
- - moz-fx-data-shared-prod.telemetry_stable.main_v5
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 2024-07-26 19:22:08.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 1970-01-01 00:00:00.000000000 +0000
@@ -1,557 +0,0 @@
--- This sums the values reported by an histogram.
-CREATE TEMP FUNCTION sum_values(x ARRAY>) AS (
- (
- WITH a AS (
- SELECT
- IF(ARRAY_LENGTH(x) > 0, 1, 0) AS isPres1
- ),
- b AS (
- SELECT
- -- Truncate pathological values that are beyond the documented limits per
- -- https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/collection/histograms.html#histogram-values
- SUM(LEAST(2147483648, value)) AS t
- FROM
- UNNEST(x)
- WHERE
- key > 0
- )
- SELECT
- COALESCE(isPres1 * t, 0)
- FROM
- a,
- b
- )
-);
-
--- This counts how many times an histogram is not present.
--- It checks if the histogram is present at all and whether or not it recorded
--- any non-zero value.
-CREATE TEMP FUNCTION empty(x ARRAY>) AS (
- (
- WITH a AS (
- SELECT
- IF(ARRAY_LENGTH(x) = 0, 1, 0) AS isEmpty1
- ),
- b AS (
- SELECT
- IF(MAX(value) = 0, 1, 0) isEmpty2
- FROM
- UNNEST(x)
- ),
- c AS (
- SELECT
- IF(isEmpty2 = 1 OR isEmpty1 = 1, 1, 0) AS Empty
- FROM
- a,
- b
- )
- SELECT
- Empty
- FROM
- C
- )
-);
-
--- Get a stable source for DAUs.
-WITH DAUs AS (
- SELECT
- `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`(
- country,
- city,
- geo_subdivision1,
- geo_subdivision2
- ).*,
- -- Truncate the submission timestamp to the hour. Note that this filed was
- -- introduced on the 16th December 2019, so it will be `null` for queries
- -- before that day. See https://github.com/mozilla/bigquery-etl/pull/603 .
- TIMESTAMP_TRUNC(submission_timestamp_min, HOUR) AS `datetime`,
- COUNT(*) AS client_count
- FROM
- `moz-fx-data-shared-prod.telemetry.clients_daily`
- WHERE
- submission_date = @submission_date
- -- Country can be null or `??` if geoip lookup failed.
- -- There's no point in adding these to the analyses.
- -- Due to a bug in `telemetry.clients_daily` we need to
- -- check for '??' as well in addition to null.
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- -- Filter filter out cities for which we have less than or equal to
- -- 50 hourly active users. This will make sure data won't end up in
- -- the final table.
- HAVING
- country <> "??"
- AND client_count > 50
-),
--- Compute aggregates for the health data.
-health_data_sample AS (
- SELECT
- `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`(
- metadata.geo.country,
- metadata.geo.city,
- metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1
- metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2
- ).*,
- TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
- client_id,
- SUM(
- COALESCE(
- SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.undefined') AS INT64),
- 0
- )
- ) AS e_undefined,
- SUM(
- COALESCE(
- SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.timeout') AS INT64),
- 0
- )
- ) AS e_timeout,
- SUM(
- COALESCE(
- SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.abort') AS INT64),
- 0
- )
- ) AS e_abort,
- SUM(
- COALESCE(
- SAFE_CAST(
- JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eUnreachable') AS INT64
- ),
- 0
- )
- ) AS e_unreachable,
- SUM(
- COALESCE(
- SAFE_CAST(
- JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eTerminated') AS INT64
- ),
- 0
- )
- ) AS e_terminated,
- SUM(
- COALESCE(
- SAFE_CAST(
- JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eChannelOpen') AS INT64
- ),
- 0
- )
- ) AS e_channel_open,
- FROM
- `moz-fx-data-shared-prod.telemetry.health`
- WHERE
- DATE(submission_timestamp) = @submission_date
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`,
- client_id
- HAVING
- country <> "??"
-),
-health_data_aggregates AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`,
- COUNTIF(e_undefined > 0) AS num_clients_e_undefined,
- COUNTIF(e_timeout > 0) AS num_clients_e_timeout,
- COUNTIF(e_abort > 0) AS num_clients_e_abort,
- COUNTIF(e_unreachable > 0) AS num_clients_e_unreachable,
- COUNTIF(e_terminated > 0) AS num_clients_e_terminated,
- COUNTIF(e_channel_open > 0) AS num_clients_e_channel_open,
- FROM
- health_data_sample
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
-final_health_data AS (
- SELECT
- h.country,
- h.city,
- h.geo_subdivision1,
- h.geo_subdivision2,
- h.datetime,
- (num_clients_e_undefined / DAUs.client_count) AS proportion_undefined,
- (num_clients_e_timeout / DAUs.client_count) AS proportion_timeout,
- (num_clients_e_abort / DAUs.client_count) AS proportion_abort,
- (num_clients_e_unreachable / DAUs.client_count) AS proportion_unreachable,
- (num_clients_e_terminated / DAUs.client_count) AS proportion_terminated,
- (num_clients_e_channel_open / DAUs.client_count) AS proportion_channel_open,
- FROM
- health_data_aggregates AS h
- INNER JOIN
- DAUs
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-),
--- Compute aggregates for histograms coming from the health ping.
-histogram_data_sample AS (
- SELECT
- `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`(
- metadata.geo.country,
- metadata.geo.city,
- metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1
- metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2
- ).*,
- client_id,
- document_id,
- TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS time_slot,
- payload.info.subsession_length AS subsession_length,
- mozfun.hist.extract(payload.histograms.dns_failed_lookup_time).values AS dns_fail,
- mozfun.hist.extract(payload.histograms.dns_lookup_time).values AS dns_success,
- mozfun.hist.extract(payload.histograms.ssl_cert_verification_errors).values AS ssl_cert_errors,
- mozfun.hist.extract(
- payload.processes.content.histograms.http_page_tls_handshake
- ).values AS tls_handshake,
- FROM
- `moz-fx-data-shared-prod.telemetry_stable.main_v5`
- WHERE
- DATE(submission_timestamp) = @submission_date
- -- Restrict to Firefox.
- AND normalized_app_name = 'Firefox'
- -- Only to pings who seem to represent an active session.
- AND payload.info.subsession_length >= 0
- -- Country can be null if geoip lookup failed.
- -- There's no point in adding these to the analyses.
- AND metadata.geo.country IS NOT NULL
-),
--- DNS_SUCCESS histogram
-dns_success_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- EXP(SUM(LOG(key) * count) / SUM(count)) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- key,
- SUM(LEAST(2147483648, value)) AS count
- FROM
- histogram_data_sample
- CROSS JOIN
- UNNEST(histogram_data_sample.dns_success)
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- key
- )
- WHERE
- key > 0
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- Oddness: active sessions without DNS_LOOKUP_TIME
-dns_no_dns_lookup_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / (
- 1 + SUM(IF(subsession_length > 0, 1, 0))
- ) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- subsession_length,
- empty(dns_success) AS is_empty
- FROM
- histogram_data_sample
- )
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- A shared source for the DNS_FAIL histogram
-dns_failure_src AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- key,
- SUM(LEAST(2147483648, value)) AS count
- FROM
- histogram_data_sample
- CROSS JOIN
- UNNEST(histogram_data_sample.dns_fail)
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- key
-),
--- DNS_FAIL histogram
-dns_failure_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- EXP(SUM(LOG(key) * count) / SUM(count)) AS value
- FROM
- dns_failure_src
- WHERE
- key > 0
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- DNS_FAIL counts
-dns_failure_counts AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- AVG(count) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- SUM(count) AS count
- FROM
- dns_failure_src
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id
- )
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- Oddness: active sessions without DNS_FAILED_LOOKUP_TIME
-dns_no_dns_failure_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / (
- 1 + SUM(IF(subsession_length > 0, 1, 0))
- ) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- subsession_length,
- empty(dns_fail) AS is_empty
- FROM
- histogram_data_sample
- )
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- SSL_CERT_VERIFICATION_ERRORS histograms
-ssl_error_prop_src AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- document_id,
- subsession_length,
- sum_values(ssl_cert_errors) AS ssl_sum_vals
- FROM
- histogram_data_sample
-),
-ssl_error_prop AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- SUM(IF(subsession_length > 0 AND ssl_sum_vals > 0, 1, 0)) / (
- 1 + SUM(IF(subsession_length > 0, 1, 0))
- ) AS value
- FROM
- ssl_error_prop_src
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- TLS_HANDSHAKE histogram
-tls_handshake_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- EXP(SUM(LOG(key) * count) / SUM(count)) AS value
- FROM
- (
- SELECT
- country,
- city,
- client_id,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- key,
- SUM(LEAST(2147483648, value)) AS count
- FROM
- histogram_data_sample
- CROSS JOIN
- UNNEST(histogram_data_sample.tls_handshake)
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- key
- )
- WHERE
- key > 0
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-)
-SELECT
- DAUs.country AS country,
- -- Caveat: mindmind location mapping returns `??` in cases where
- -- the population is less than 15k. As long as the entry has a higher level
- -- grouping (country) we still want to keep it. For backwards compatibility,
- -- we rename this value to `unknown` to represent a lack of value.
- IF(DAUs.city = "??", "unknown", DAUs.city) AS city,
- NULLIF(DAUs.geo_subdivision1, "??") AS geo_subdivision1,
- NULLIF(DAUs.geo_subdivision2, "??") AS geo_subdivision2,
- DAUs.datetime AS `datetime`,
- hd.* EXCEPT (`datetime`, country, city, geo_subdivision1, geo_subdivision2),
- ds.value AS avg_dns_success_time,
- ds_missing.value AS missing_dns_success,
- df.value AS avg_dns_failure_time,
- df_missing.value AS missing_dns_failure,
- dfc.value AS count_dns_failure,
- ssl.value AS ssl_error_prop,
- tls.value AS avg_tls_handshake_time
-FROM
- final_health_data AS hd
--- We apply LEFT JOIN here and in the other places instead
--- of a FULL OUTER JOIN. Since LEFT is DAUs, which should contain
--- all the countries and all the days, it should always have matches
--- with whatever we pass on the RIGHT.
--- When doing a FULL OUTER JOIN, we end up sometimes with nulls on the
--- left because there are a few samples coming from telemetry.main that
--- are not accounted for in `moz-fx-data-shared-prod.telemetry.clients_daily`
-LEFT JOIN
- DAUs
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_success_time AS ds
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_no_dns_lookup_time AS ds_missing
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_failure_time AS df
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_failure_counts AS dfc
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_no_dns_failure_time AS df_missing
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- tls_handshake_time AS tls
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- ssl_error_prop AS ssl
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 2024-07-26 19:22:08.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,116 +0,0 @@
-fields:
-- mode: NULLABLE
- name: country
- type: STRING
- description: |
- The Country code of the client.
-
-- mode: NULLABLE
- name: city
- type: STRING
- description: |
- The City name (only for cities with a population >= 15000, 'unknown' otherwise).
-
-- mode: NULLABLE
- name: geo_subdivision1
- type: STRING
- description: |
- Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces,
- French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes.
-
-- mode: NULLABLE
- name: geo_subdivision2
- type: STRING
- description: |
- Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces.
-
-- mode: NULLABLE
- name: datetime
- type: TIMESTAMP
- description: |
- The date and the time (truncated to hour) the data was submitted by the client.
-
-- mode: NULLABLE
- name: proportion_undefined
- type: FLOAT
- description: |
- The proportion of users who failed to send telemetry for a reason that was not listed in the other cases.
-
-# yamllint disable rule:line-length
-- mode: NULLABLE
- name: proportion_timeout
- type: FLOAT
- description: |
- The proportion of users that had their connection timeout while uploading telemetry
- ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)).
-# yamllint enable rule:line-length
-
-- mode: NULLABLE
- name: proportion_abort
- type: FLOAT
- description: |
- The proportion of users that had their connection terminated by the client
- (for example, terminating open connections before shutting down).
-
-- mode: NULLABLE
- name: proportion_unreachable
- type: FLOAT
- description: |
- The proportion of users that failed to upload telemetry because the server was not reachable
- (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension).
-
-- mode: NULLABLE
- name: proportion_terminated
- type: FLOAT
- description: |
- The proportion of users that had their connection terminated internally by the networking code.
-
-- mode: NULLABLE
- name: proportion_channel_open
- type: FLOAT
- description: |
- The proportion of users for which the upload request was terminated immediately,
- by the client,because of a Necko internal error.
-
-- mode: NULLABLE
- name: avg_dns_success_time
- type: FLOAT
- description: |
- The average time it takes for a successful DNS resolution, in milliseconds.
-
-- mode: NULLABLE
- name: missing_dns_success
- type: FLOAT
- description: |
-
- Counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram.
-
-- mode: NULLABLE
- name: avg_dns_failure_time
- type: FLOAT
- description: |
- The average time it takes for an unsuccessful DNS resolution, in milliseconds.
-
-- mode: NULLABLE
- name: missing_dns_failure
- type: FLOAT
- description: |
- Counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram.
-
-- mode: NULLABLE
- name: count_dns_failure
- type: FLOAT
- description: |
- The average count of unsuccessful DNS resolutions reported.
-
-- mode: NULLABLE
- name: ssl_error_prop
- type: FLOAT
- description: |
- The proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram.
-
-- mode: NULLABLE
- name: avg_tls_handshake_time
- type: FLOAT
- description: |
- The average time after the TCP SYN to ready for HTTP, in milliseconds.
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 19:23:17.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 19:41:12.000000000 +0000
@@ -1,10 +1,19 @@
+friendly_name: Event Monitoring Live
+description: |-
+ Materialized view of experimentation related events
+ coming from monitor_frontend.
+owners:
+- ascholtz@mozilla.com
+- akomar@mozilla.com
+labels:
+ materialized_view: true
+ owner1: ascholtz
+ owner2: akomar
+bigquery: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
- - workgroup:dataops-managed/external-fides
-
-# Generated by bigquery_etl.dependency
references:
materialized_view.sql:
- moz-fx-data-shared-prod.monitor_frontend_live.events_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 19:23:17.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 19:41:12.000000000 +0000
@@ -1,10 +1,26 @@
+friendly_name: Monitor Dashboard User Journey Funnels
+description: |-
+ Please provide a description for the query
+owners:
+- ksiegler@mozilla.org
+labels:
+ incremental: true
+ dag: bqetl_generated_funnels
+ owner1: ksiegler
+scheduling:
+ dag_name: bqetl_generated_funnels
+bigquery:
+ time_partitioning:
+ type: day
+ field: submission_date
+ require_partition_filter: false
+ expiration_days: null
+ range_partitioning: null
+ clustering: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
- - workgroup:dataops-managed/external-fides
-
-# Generated by bigquery_etl.dependency
references:
query.sql:
- mozdata.monitor_frontend.events_unnested
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 19:22:07.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 19:31:49.000000000 +0000
@@ -1,7 +1,13 @@
fields:
-- type: DATETIME
- name: build_hour
-- type: INTEGER
- name: geckoview_major_version
-- type: INTEGER
- name: n_pings
+- name: build_hour
+ type: DATETIME
+ mode: NULLABLE
+ description: null
+- name: geckoview_major_version
+ type: INTEGER
+ mode: NULLABLE
+ description: null
+- name: n_pings
+ type: INTEGER
+ mode: NULLABLE
+ description: null
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_
```
Integration report for "Add broken UDF for testing"
sql.diff
Click to expand!
```diff
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 20:06:50.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 20:26:02.000000000 +0000
@@ -276,6 +276,21 @@
)
)
+ monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query(
+ task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1",
+ destination_table="monitor_dashboard_user_journey_funnels_v1",
+ dataset_id="monitor_frontend_derived",
+ project_id="moz-fx-data-shared-prod",
+ owner="ksiegler@mozilla.org",
+ email=[
+ "ascholtz@mozilla.com",
+ "ksiegler@mozilla.org",
+ "telemetry-alerts@mozilla.com",
+ ],
+ date_partition_parameter="submission_date",
+ depends_on_past=False,
+ )
+
accounts_frontend_derived__email_first_reg_login_funnels__v1.set_upstream(
wait_for_copy_deduplicate_all
)
@@ -325,3 +340,7 @@
firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_stdout_events__v1
)
+
+ monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream(
+ wait_for_copy_deduplicate_all
+ )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py 2024-07-26 20:06:50.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py 2024-07-26 20:25:57.000000000 +0000
@@ -88,12 +88,12 @@
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
- checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check(
- task_id="checks__warn_internet_outages__global_outages__v1",
+ checks__fail_internet_outages__global_outages__v1 = bigquery_dq_check(
+ task_id="checks__fail_internet_outages__global_outages__v1",
source_table="global_outages_v1",
dataset_id="internet_outages",
project_id="moz-fx-data-shared-prod",
- is_dq_check_fail=False,
+ is_dq_check_fail=True,
owner="aplacitelli@mozilla.com",
email=["aplacitelli@mozilla.com"],
depends_on_past=False,
@@ -101,9 +101,9 @@
retries=0,
)
- checks__warn_internet_outages__global_outages__v2 = bigquery_dq_check(
- task_id="checks__warn_internet_outages__global_outages__v2",
- source_table="global_outages_v2",
+ checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check(
+ task_id="checks__warn_internet_outages__global_outages__v1",
+ source_table="global_outages_v1",
dataset_id="internet_outages",
project_id="moz-fx-data-shared-prod",
is_dq_check_fail=False,
@@ -125,25 +125,14 @@
depends_on_past=False,
)
- internet_outages__global_outages__v2 = bigquery_etl_query(
- task_id="internet_outages__global_outages__v2",
- destination_table="global_outages_v2",
- dataset_id="internet_outages",
- project_id="moz-fx-data-shared-prod",
- owner="aplacitelli@mozilla.com",
- email=["aplacitelli@mozilla.com"],
- date_partition_parameter="submission_date",
- depends_on_past=False,
+ checks__fail_internet_outages__global_outages__v1.set_upstream(
+ internet_outages__global_outages__v1
)
checks__warn_internet_outages__global_outages__v1.set_upstream(
internet_outages__global_outages__v1
)
- checks__warn_internet_outages__global_outages__v2.set_upstream(
- internet_outages__global_outages__v2
- )
-
internet_outages__global_outages__v1.set_upstream(wait_for_copy_deduplicate_all)
internet_outages__global_outages__v1.set_upstream(
@@ -153,13 +142,3 @@
internet_outages__global_outages__v1.set_upstream(
wait_for_telemetry_derived__clients_daily_joined__v1
)
-
- internet_outages__global_outages__v2.set_upstream(wait_for_copy_deduplicate_all)
-
- internet_outages__global_outages__v2.set_upstream(
- wait_for_copy_deduplicate_main_ping
- )
-
- internet_outages__global_outages__v2.set_upstream(
- wait_for_telemetry_derived__clients_daily_joined__v1
- )
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py
--- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-07-26 20:06:50.000000000 +0000
+++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-07-26 20:25:58.000000000 +0000
@@ -158,23 +158,6 @@
search_derived__mobile_search_clients_daily__v1
)
- search_derived__mobile_search_clients_daily__v2 = bigquery_etl_query(
- task_id="search_derived__mobile_search_clients_daily__v2",
- destination_table="mobile_search_clients_daily_v2",
- dataset_id="search_derived",
- project_id="moz-fx-data-shared-prod",
- owner="cmorales@mozilla.com",
- email=[
- "akomar@mozilla.com",
- "akommasani@mozilla.com",
- "anicholson@mozilla.com",
- "cmorales@mozilla.com",
- "telemetry-alerts@mozilla.com",
- ],
- date_partition_parameter="submission_date",
- depends_on_past=False,
- )
-
search_derived__mobile_search_clients_last_seen__v1 = bigquery_etl_query(
task_id="search_derived__mobile_search_clients_last_seen__v1",
destination_table="mobile_search_clients_last_seen_v1",
@@ -199,10 +182,6 @@
wait_for_copy_deduplicate_all
)
- search_derived__mobile_search_clients_daily__v2.set_upstream(
- wait_for_copy_deduplicate_all
- )
-
search_derived__mobile_search_clients_last_seen__v1.set_upstream(
search_derived__mobile_search_clients_daily__v1
)
Only in /tmp/workspace/generated-sql/sql/mozfun/addons: is_adblocker_broken
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages_v2
Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived: mobile_search_clients_daily_v2
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 2024-07-26 20:01:38.000000000 +0000
@@ -0,0 +1,2 @@
+description: Returns whether a given Addon ID is an adblocker.
+friendly_name: Is Adblocker
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md
--- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 2024-07-26 20:01:38.000000000 +0000
@@ -0,0 +1,16 @@
+Determine if a given Addon ID is for an adblocker.
+
+As an example, this query will give the number of users who
+have an adblocker installed.
+```
+SELECT
+ submission_date,
+ COUNT(DISTINCT client_id) AS dau,
+FROM
+ mozdata.telemetry.addons
+WHERE
+ mozfun.addons.is_adblocker(addon_id)
+ AND submission_date >= "2023-01-01"
+GROUP BY
+ submission_date
+```
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql
--- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 1970-01-01 00:00:00.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 2024-07-26 20:01:38.000000000 +0000
@@ -0,0 +1,19 @@
+CREATE OR REPLACE FUNCTION addons.is_adblocker_broken(addon_id STRING)
+RETURNS BOOLEAN AS (
+ addon_id IS NOT NULL
+ AND addon_id IN (
+ 'uBlock0@raymondhill.net', -- 'uBlock_Origin'
+ '{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', -- 'AdBlock_Plus'
+ 'jid1-NIfFY2CA8fy1tg@jetpack', -- 'Adblock'
+ 'firefox@ghostery.com', -- 'Ghostery'
+ 'adblockultimate@adblockultimate.net' -- 'AdBlock_Ultimate'
+ )
+);
+
+-- Tests
+SELECT
+ assert.true(addons.is_adblocker_broken('borken')),
+ assert.true(addons.is_adblocker_broken('adblockultimate@adblockultimate.net')),
+ assert.false(addons.is_adblocker_broken('failing test')),
+ assert.false(addons.is_adblocker_broken('')),
+ assert.false(addons.is_adblocker_broken(NULL)),
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 20:10:32.000000000 +0000
@@ -1,49 +1,49 @@
fields:
-- mode: NULLABLE
- name: submission_date
+- name: submission_date
type: DATE
-- mode: NULLABLE
- name: source
+ mode: NULLABLE
+- name: source
type: STRING
-- mode: NULLABLE
- name: event_type
+ mode: NULLABLE
+- name: event_type
type: STRING
-- mode: NULLABLE
- name: form_factor
+ mode: NULLABLE
+- name: form_factor
type: STRING
-- mode: NULLABLE
- name: country
+ mode: NULLABLE
+- name: country
type: STRING
-- mode: NULLABLE
- name: subdivision1
+ mode: NULLABLE
+- name: subdivision1
type: STRING
-- mode: NULLABLE
- name: advertiser
+ mode: NULLABLE
+- name: advertiser
type: STRING
-- mode: NULLABLE
- name: release_channel
+ mode: NULLABLE
+- name: release_channel
type: STRING
-- mode: NULLABLE
- name: position
+ mode: NULLABLE
+- name: position
type: INTEGER
-- mode: NULLABLE
- name: provider
+ mode: NULLABLE
+- name: provider
type: STRING
-- mode: NULLABLE
- name: match_type
+ mode: NULLABLE
+- name: match_type
type: STRING
-- mode: NULLABLE
- name: normalized_os
+ mode: NULLABLE
+- name: normalized_os
type: STRING
-- mode: NULLABLE
- name: suggest_data_sharing_enabled
+ mode: NULLABLE
+- name: suggest_data_sharing_enabled
type: BOOLEAN
-- mode: NULLABLE
- name: event_count
+ mode: NULLABLE
+- name: event_count
type: INTEGER
-- mode: NULLABLE
- name: user_count
+ mode: NULLABLE
+- name: user_count
type: INTEGER
-- mode: NULLABLE
- name: query_type
+ mode: NULLABLE
+- name: query_type
type: STRING
+ mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 20:10:32.000000000 +0000
@@ -1,40 +1,40 @@
fields:
-- mode: NULLABLE
- name: submission_date
+- name: submission_date
type: DATE
-- mode: NULLABLE
- name: form_factor
+ mode: NULLABLE
+- name: form_factor
type: STRING
-- mode: NULLABLE
- name: country
+ mode: NULLABLE
+- name: country
type: STRING
-- mode: NULLABLE
- name: advertiser
+ mode: NULLABLE
+- name: advertiser
type: STRING
-- mode: NULLABLE
- name: normalized_os
+ mode: NULLABLE
+- name: normalized_os
type: STRING
-- mode: NULLABLE
- name: release_channel
+ mode: NULLABLE
+- name: release_channel
type: STRING
-- mode: NULLABLE
- name: position
+ mode: NULLABLE
+- name: position
type: INTEGER
-- mode: NULLABLE
- name: provider
+ mode: NULLABLE
+- name: provider
type: STRING
-- mode: NULLABLE
- name: match_type
+ mode: NULLABLE
+- name: match_type
type: STRING
-- mode: NULLABLE
- name: suggest_data_sharing_enabled
+ mode: NULLABLE
+- name: suggest_data_sharing_enabled
type: BOOLEAN
-- mode: NULLABLE
- name: impression_count
+ mode: NULLABLE
+- name: impression_count
type: INTEGER
-- mode: NULLABLE
- name: click_count
+ mode: NULLABLE
+- name: click_count
type: INTEGER
-- mode: NULLABLE
- name: query_type
+ mode: NULLABLE
+- name: query_type
type: STRING
+ mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 20:11:42.000000000 +0000
@@ -26,6 +26,9 @@
- name: adjust_network
type: STRING
mode: NULLABLE
+- name: install_source
+ type: STRING
+ mode: NULLABLE
- name: retained_week_2
type: BOOLEAN
mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 20:11:43.000000000 +0000
@@ -48,6 +48,10 @@
description: 'The type of source of a client installation.
'
+- name: install_source
+ type: STRING
+ mode: NULLABLE
+ description: null
- name: new_profiles
type: INTEGER
mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 20:10:47.000000000 +0000
@@ -6,9 +6,10 @@
- name: site_url
type: STRING
mode: NULLABLE
- description: |-
- For domain properties, this will be `sc-domain:` followed by the domain name.
- For URL-prefix properties, it will be the full URL of the property definition.
+ description: 'For domain properties, this will be `sc-domain:` followed by the domain
+ name.
+
+ For URL-prefix properties, it will be the full URL of the property definition.'
- name: site_domain_name
type: STRING
mode: NULLABLE
@@ -16,103 +17,110 @@
- name: page_url
type: STRING
mode: NULLABLE
- description: |-
- The final page URL linked by a search result after any skip redirects.
- This will be null for anonymized Discover impressions.
+ description: 'The final page URL linked by a search result after any skip redirects.
+
+ This will be null for anonymized Discover impressions.'
- name: page_domain_name
type: STRING
mode: NULLABLE
- description: |-
- Domain name of the page URL.
- This will be null for anonymized Discover impressions.
+ description: 'Domain name of the page URL.
+
+ This will be null for anonymized Discover impressions.'
- name: page_path
type: STRING
mode: NULLABLE
- description: |-
- The path part of the page URL.
- This will be null for anonymized Discover impressions.
+ description: 'The path part of the page URL.
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site_code
type: STRING
mode: NULLABLE
- description: |-
- Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Localized site code such as `en-US` or `de` found in the first segment
+ of the page URL path (if any).
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site
type: STRING
mode: NULLABLE
- description: |-
- Description of the localized site language and/or country based on `localized_site_code` (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Description of the localized site language and/or country based on
+ `localized_site_code` (if any).
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site_language_code
type: STRING
mode: NULLABLE
- description: |-
- Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Localized site language code in ISO-639-alpha-2 format found in the
+ first segment of the page URL path (if any).
+
+ This will be null for anonymized Discover impressions.'
- name: localized_site_language
type: STRING
mode: NULLABLE
- description: |-
- Localized site language based on `localized_site_language_code` (if any).
- This will be null for anonymized Discover impressions.
+ description: 'Localized site language based on `localized_site_language_code` (if
+ any).
+
+ This will be null for anonymized Discover impressions.'
- name: query
type: STRING
mode: NULLABLE
- description: |-
- The search query.
- This will be null for anonymized search impressions, and all Discover and Google News search impressions.
+ description: 'The search query.
+
+ This will be null for anonymized search impressions, and all Discover and Google
+ News search impressions.'
- name: query_type
type: STRING
mode: NULLABLE
- description: |-
- Type of search query:
- * Anonymized: Query was redacted by Google to protect the users' privacy.
- * Brand: Query contained one or more Mozilla brand keywords.
- * Non-Brand: Query didn't contain any Mozilla brand keywords.
- * Unknown: Query couldn't be classified.
- This will be null for all Discover and Google News search impressions.
+ description: "Type of search query:\n * Anonymized: Query was redacted by Google\
+ \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\
+ \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\
+ \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\
+ \ and Google News search impressions."
- name: is_anonymized
type: BOOLEAN
mode: NULLABLE
- description: |-
- Whether Google has anonymized the search impression to protect the users' privacy.
+ description: 'Whether Google has anonymized the search impression to protect the
+ users'' privacy.
+
The `query` field will be null for anonymized search impressions.
- The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions.
+
+ The `country_code`, `page_url`, and related fields will be null for anonymized
+ Discover impressions.'
- name: has_good_page_experience
type: BOOLEAN
mode: NULLABLE
- description: |-
- Whether Google Search considers the page to be providing a good page experience.
- This will be null when the source data wasn't exported directly to BigQuery by Google.
+ description: 'Whether Google Search considers the page to be providing a good page
+ experience.
+
+ This will be null when the source data wasn''t exported directly to BigQuery by
+ Google.'
- name: search_type
type: STRING
mode: NULLABLE
- description: |-
- Where the link was seen by the user:
- * Web: In Google Search's default "All" tab.
- * Image: In Google Search's "Images" tab.
- * Video: In Google Search's "Videos" tab.
- * News: In Google Search's "News" tab.
- * Discover: In Google's Discover feed.
- * Google News: On news.google.com or in the Google News app on Android and iOS.
+ description: "Where the link was seen by the user:\n * Web: In Google Search's\
+ \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\
+ \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\
+ \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\
+ \ or in the Google News app on Android and iOS."
- name: search_appearance
type: STRING
mode: NULLABLE
- description: |-
- How the search result appeared (e.g. normal result, translated result, video).
- This will be null when the source data wasn't exported directly to BigQuery by Google.
+ description: 'How the search result appeared (e.g. normal result, translated result,
+ video).
+
+ This will be null when the source data wasn''t exported directly to BigQuery by
+ Google.'
- name: user_country_code
type: STRING
mode: NULLABLE
- description: |-
- Country from which the user was searching, in ISO-3166-1-alpha-3 format.
- This will be null for anonymized Discover impressions.
+ description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format.
+
+ This will be null for anonymized Discover impressions.'
- name: user_country
type: STRING
mode: NULLABLE
- description: |-
- Country from which the user was searching.
- This will be null for anonymized Discover impressions.
+ description: 'Country from which the user was searching.
+
+ This will be null for anonymized Discover impressions.'
- name: user_region
type: STRING
mode: NULLABLE
@@ -124,13 +132,15 @@
- name: device_type
type: STRING
mode: NULLABLE
- description: |-
- The type of device on which the user was searching: Desktop, Mobile, or Tablet.
- This will be null for Discover impressions.
+ description: 'The type of device on which the user was searching: Desktop, Mobile,
+ or Tablet.
+
+ This will be null for Discover impressions.'
- name: impressions
type: INTEGER
mode: NULLABLE
- description: The number of times that search results with a link to the page were shown to a user.
+ description: The number of times that search results with a link to the page were
+ shown to a user.
- name: clicks
type: INTEGER
mode: NULLABLE
@@ -138,6 +148,7 @@
- name: average_position
type: FLOAT
mode: NULLABLE
- description: |-
- The average position of the page in the search results, where `1` is the topmost position.
- This will be null for Discover and Google News search impressions.
+ description: 'The average position of the page in the search results, where `1`
+ is the topmost position.
+
+ This will be null for Discover and Google News search impressions.'
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 20:10:48.000000000 +0000
@@ -6,9 +6,10 @@
- name: site_url
type: STRING
mode: NULLABLE
- description: |-
- For domain properties, this will be `sc-domain:` followed by the domain name.
- For URL-prefix properties, it will be the full URL of the property definition.
+ description: 'For domain properties, this will be `sc-domain:` followed by the domain
+ name.
+
+ For URL-prefix properties, it will be the full URL of the property definition.'
- name: site_domain_name
type: STRING
mode: NULLABLE
@@ -20,27 +21,23 @@
- name: query_type
type: STRING
mode: NULLABLE
- description: |-
- Type of search query:
- * Anonymized: Query was redacted by Google to protect the users' privacy.
- * Brand: Query contained one or more Mozilla brand keywords.
- * Non-Brand: Query didn't contain any Mozilla brand keywords.
- * Unknown: Query couldn't be classified.
+ description: "Type of search query:\n * Anonymized: Query was redacted by Google\
+ \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\
+ \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\
+ \ * Unknown: Query couldn't be classified."
- name: is_anonymized
type: BOOLEAN
mode: NULLABLE
- description: |-
- Whether Google has anonymized the search impression to protect the users' privacy.
- The `query` field will be null for anonymized search impressions.
+ description: 'Whether Google has anonymized the search impression to protect the
+ users'' privacy.
+
+ The `query` field will be null for anonymized search impressions.'
- name: search_type
type: STRING
mode: NULLABLE
- description: |-
- Where the link was seen by the user:
- * Web: In Google Search's default "All" tab.
- * Image: In Google Search's "Images" tab.
- * Video: In Google Search's "Videos" tab.
- * News: In Google Search's "News" tab.
+ description: "Where the link was seen by the user:\n * Web: In Google Search's\
+ \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\
+ \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab."
- name: user_country_code
type: STRING
mode: NULLABLE
@@ -60,17 +57,20 @@
- name: device_type
type: STRING
mode: NULLABLE
- description: |-
- The type of device on which the user was searching: Desktop, Mobile, or Tablet.
+ description: 'The type of device on which the user was searching: Desktop, Mobile,
+ or Tablet.'
- name: impressions
type: INTEGER
mode: NULLABLE
- description: The number of times that search results with at least one link to the site were shown to a user.
+ description: The number of times that search results with at least one link to the
+ site were shown to a user.
- name: clicks
type: INTEGER
mode: NULLABLE
- description: The number of times a user clicked at least one search result link to the site.
+ description: The number of times a user clicked at least one search result link
+ to the site.
- name: average_top_position
type: FLOAT
mode: NULLABLE
- description: The average top position of the site in the search results, where `1` is the topmost position.
+ description: The average top position of the site in the search results, where `1`
+ is the topmost position.
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 2024-07-26 20:01:25.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,15 +0,0 @@
-friendly_name: Global Outages
-description: |-
- Please provide a description for the query
-owners: []
-labels: {}
-bigquery: null
-workgroup_access:
-- role: roles/bigquery.dataViewer
- members:
- - workgroup:mozilla-confidential
- - workgroup:internet-outages/external
-references:
- view.sql:
- - moz-fx-data-shared-prod.internet_outages.global_outages_v1
- - moz-fx-data-shared-prod.internet_outages.global_outages_v2
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 2024-07-26 20:01:25.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,58 +0,0 @@
-fields:
-- name: country
- type: STRING
- mode: NULLABLE
-- name: city
- type: STRING
- mode: NULLABLE
-- name: geo_subdivision1
- type: STRING
- mode: NULLABLE
-- name: geo_subdivision2
- type: STRING
- mode: NULLABLE
-- name: datetime
- type: TIMESTAMP
- mode: NULLABLE
-- name: proportion_undefined
- type: FLOAT
- mode: NULLABLE
-- name: proportion_timeout
- type: FLOAT
- mode: NULLABLE
-- name: proportion_abort
- type: FLOAT
- mode: NULLABLE
-- name: proportion_unreachable
- type: FLOAT
- mode: NULLABLE
-- name: proportion_terminated
- type: FLOAT
- mode: NULLABLE
-- name: proportion_channel_open
- type: FLOAT
- mode: NULLABLE
-- name: avg_dns_success_time
- type: FLOAT
- mode: NULLABLE
-- name: missing_dns_success
- type: FLOAT
- mode: NULLABLE
-- name: avg_dns_failure_time
- type: FLOAT
- mode: NULLABLE
-- name: missing_dns_failure
- type: FLOAT
- mode: NULLABLE
-- name: count_dns_failure
- type: FLOAT
- mode: NULLABLE
-- name: ssl_error_prop
- type: FLOAT
- mode: NULLABLE
-- name: avg_tls_handshake_time
- type: FLOAT
- mode: NULLABLE
-- name: dataset_version
- type: STRING
- mode: NULLABLE
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 2024-07-26 20:00:50.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 1970-01-01 00:00:00.000000000 +0000
@@ -1,52 +0,0 @@
-CREATE OR REPLACE VIEW
- `moz-fx-data-shared-prod.internet_outages.global_outages`
-AS
-SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`,
- proportion_undefined,
- proportion_timeout,
- proportion_abort,
- proportion_unreachable,
- proportion_terminated,
- proportion_channel_open,
- avg_dns_success_time,
- missing_dns_success,
- avg_dns_failure_time,
- missing_dns_failure,
- count_dns_failure,
- ssl_error_prop,
- avg_tls_handshake_time,
- "v2" AS dataset_version,
-FROM
- `moz-fx-data-shared-prod.internet_outages.global_outages_v2`
-WHERE
- DATE(`datetime`) >= "2024-07-26"
-UNION ALL
-SELECT
- country,
- city,
- CAST(NULL AS STRING) AS geo_subdivision1,
- CAST(NULL AS STRING) AS geo_subdivision2,
- `datetime`,
- proportion_undefined,
- proportion_timeout,
- proportion_abort,
- proportion_unreachable,
- proportion_terminated,
- proportion_channel_open,
- avg_dns_success_time,
- missing_dns_success,
- avg_dns_failure_time,
- missing_dns_failure,
- count_dns_failure,
- ssl_error_prop,
- avg_tls_handshake_time,
- "v1" AS dataset_version,
-FROM
- `moz-fx-data-shared-prod.internet_outages.global_outages_v1`
-WHERE
- DATE(`datetime`) < "2024-07-26"
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 20:00:50.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 20:01:38.000000000 +0000
@@ -1,10 +1,10 @@
-#warn
+#fail
{{ min_row_count(1000, where="DATE(datetime) = @submission_date") }}
-#warn
+#fail
{{ is_unique(columns=["datetime", "city", "country"], where="DATE(`datetime`) = @submission_date") }}
-#warn
+#fail
/*
This statement used to contain the following fields,
but these are sometimes missing from country/city combinations
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 20:02:42.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 20:20:57.000000000 +0000
@@ -1,4 +1,4 @@
-friendly_name: Internet Outages (Version 1)
+friendly_name: Internet Outages
description: |-
This contains a set aggregated metrics that correlate to internet
outages for different countries in the world.
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 2024-07-26 20:00:50.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 1970-01-01 00:00:00.000000000 +0000
@@ -1,38 +0,0 @@
-{% set _WHERE = '@submission_date = @submission_date' %}
-
-#warn
-{{ min_row_count(1000, where=_WHERE) }}
-
-#warn
-{{ is_unique(columns=["country", "city", "geo_subdivision1", "geo_subdivision2", "datetime"], where=_WHERE) }}
-
-#warn
-/*
- This statement used to contain the following fields,
- but these are sometimes missing from country/city combinations
- See https://sql.telemetry.mozilla.org/queries/96541/source
- and bug 1868674
-
- "avg_tls_handshake_time"
- "count_dns_failure"
-*/
-{{ not_null(columns=[
- "country",
- "city",
- "datetime",
- "proportion_undefined",
- "proportion_timeout",
- "proportion_abort",
- "proportion_unreachable",
- "proportion_terminated",
- "proportion_channel_open",
- "avg_dns_success_time",
- "missing_dns_success",
- "avg_dns_failure_time",
- "missing_dns_failure",
- "ssl_error_prop",
-
-], where=_WHERE) }}
-
-#warn
-{{ value_length(column="country", expected_length=2, where=_WHERE) }}
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 2024-07-26 20:02:42.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,98 +0,0 @@
-friendly_name: Internet Outages (version 2)
-description: |-
- Note: Version 2 contains two additional fields: geo_subdivision1 && geo_subdivision2 for more accurate data grouping
- and the data available starting 2024/07/26.
-
- This contains a set aggregated metrics that correlate to internet
- outages for different countries in the world.
-
- The dataset contains the following fields:
- - `country`:
- the Country code of the client.
- - `city`:
- the City name (only for cities with a population >= 15000, 'unknown' otherwise).
- - `geo_subdivision1`:
- Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces,
- French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes.
- - `geo_subdivision2`:
- Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces.
- - `datetime`:
- the date and the time (truncated to hour) the data was submitted by the client.
- - `proportion_undefined`:
- the proportion of users who failed to send telemetry for a reason that was not listed in the other cases.
- - `proportion_timeout`:
- the proportion of users that had their connection timeout while uploading telemetry
- ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)).
- - `proportion_abort`:
- the proportion of users that had their connection terminated by the client (for example, terminating open connections before shutting down).
- - `proportion_unreachable`:
- the proportion of users that failed to upload telemetry because the server was not reachable
- (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension).
- - `proportion_terminated`:
- the proportion of users that had their connection terminated internally by the networking code.
- - `proportion_channel_open`:
- the proportion of users for which the upload request was terminated immediately, by the client, because of a Necko internal error.
- - `avg_dns_success_time`:
- the average time it takes for a successful DNS resolution, in milliseconds.
- - `missing_dns_success`:
- counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram.
- - `avg_dns_failure_time`:
- the average time it takes for an unsuccessful DNS resolution, in milliseconds.
- - `missing_dns_failure`:
- counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram.
- - `count_dns_failure`:
- the average count of unsuccessful DNS resolutions reported.
- - `ssl_error_prop`:
- the proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram.
- - `avg_tls_handshake_time`:
- the average time after the TCP SYN to ready for HTTP, in milliseconds.
-
- Caveats with the data:
- As with any observational data, there are many caveats and interpretation must
- be done carefully. Below is a list of issues we have considered, but it is not
- exhaustive.
- - Firefox users are not representative of the general population in their
- region.
- - Users can experience multiple types of failures and so the proportions
- are not summable. For example, if 2.4% of clients had a timeout and 2.6% of
- clients had eUnreachable that doesn't necessarily mean that 5.0% of clients
- had a timeout or a eUnreachable
- - Geographical data is based on IPGeo databases. These databases are
- imperfect, so some activity may be attributed to the wrong location.
- Further, proxy and VPN usage can create geo-attribution errors.
- - Data contains duplicates prior to mid-2022. See [this bug](https://bugzilla.mozilla.org/show_bug.cgi?id=1753470)
- for further information.
-owners:
-- aplacitelli@mozilla.com
-labels:
- incremental: true
- review_bugs:
- - '1640204'
- dag: bqetl_internet_outages
- owner1: aplacitelli
-scheduling:
- dag_name: bqetl_internet_outages
-bigquery:
- time_partitioning:
- type: day
- field: datetime
- require_partition_filter: false
- expiration_days: null
- range_partitioning: null
- clustering:
- fields:
- - country
- - city
-workgroup_access:
-- role: roles/bigquery.dataViewer
- members:
- - workgroup:mozilla-confidential
- - workgroup:internet-outages/external
-references:
- checks.sql:
- - ..
- query.sql:
- - C
- - moz-fx-data-shared-prod.telemetry.clients_daily
- - moz-fx-data-shared-prod.telemetry.health
- - moz-fx-data-shared-prod.telemetry_stable.main_v5
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 2024-07-26 20:00:50.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 1970-01-01 00:00:00.000000000 +0000
@@ -1,557 +0,0 @@
--- This sums the values reported by an histogram.
-CREATE TEMP FUNCTION sum_values(x ARRAY>) AS (
- (
- WITH a AS (
- SELECT
- IF(ARRAY_LENGTH(x) > 0, 1, 0) AS isPres1
- ),
- b AS (
- SELECT
- -- Truncate pathological values that are beyond the documented limits per
- -- https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/collection/histograms.html#histogram-values
- SUM(LEAST(2147483648, value)) AS t
- FROM
- UNNEST(x)
- WHERE
- key > 0
- )
- SELECT
- COALESCE(isPres1 * t, 0)
- FROM
- a,
- b
- )
-);
-
--- This counts how many times an histogram is not present.
--- It checks if the histogram is present at all and whether or not it recorded
--- any non-zero value.
-CREATE TEMP FUNCTION empty(x ARRAY>) AS (
- (
- WITH a AS (
- SELECT
- IF(ARRAY_LENGTH(x) = 0, 1, 0) AS isEmpty1
- ),
- b AS (
- SELECT
- IF(MAX(value) = 0, 1, 0) isEmpty2
- FROM
- UNNEST(x)
- ),
- c AS (
- SELECT
- IF(isEmpty2 = 1 OR isEmpty1 = 1, 1, 0) AS Empty
- FROM
- a,
- b
- )
- SELECT
- Empty
- FROM
- C
- )
-);
-
--- Get a stable source for DAUs.
-WITH DAUs AS (
- SELECT
- `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`(
- country,
- city,
- geo_subdivision1,
- geo_subdivision2
- ).*,
- -- Truncate the submission timestamp to the hour. Note that this filed was
- -- introduced on the 16th December 2019, so it will be `null` for queries
- -- before that day. See https://github.com/mozilla/bigquery-etl/pull/603 .
- TIMESTAMP_TRUNC(submission_timestamp_min, HOUR) AS `datetime`,
- COUNT(*) AS client_count
- FROM
- `moz-fx-data-shared-prod.telemetry.clients_daily`
- WHERE
- submission_date = @submission_date
- -- Country can be null or `??` if geoip lookup failed.
- -- There's no point in adding these to the analyses.
- -- Due to a bug in `telemetry.clients_daily` we need to
- -- check for '??' as well in addition to null.
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- -- Filter filter out cities for which we have less than or equal to
- -- 50 hourly active users. This will make sure data won't end up in
- -- the final table.
- HAVING
- country <> "??"
- AND client_count > 50
-),
--- Compute aggregates for the health data.
-health_data_sample AS (
- SELECT
- `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`(
- metadata.geo.country,
- metadata.geo.city,
- metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1
- metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2
- ).*,
- TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`,
- client_id,
- SUM(
- COALESCE(
- SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.undefined') AS INT64),
- 0
- )
- ) AS e_undefined,
- SUM(
- COALESCE(
- SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.timeout') AS INT64),
- 0
- )
- ) AS e_timeout,
- SUM(
- COALESCE(
- SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.abort') AS INT64),
- 0
- )
- ) AS e_abort,
- SUM(
- COALESCE(
- SAFE_CAST(
- JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eUnreachable') AS INT64
- ),
- 0
- )
- ) AS e_unreachable,
- SUM(
- COALESCE(
- SAFE_CAST(
- JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eTerminated') AS INT64
- ),
- 0
- )
- ) AS e_terminated,
- SUM(
- COALESCE(
- SAFE_CAST(
- JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eChannelOpen') AS INT64
- ),
- 0
- )
- ) AS e_channel_open,
- FROM
- `moz-fx-data-shared-prod.telemetry.health`
- WHERE
- DATE(submission_timestamp) = @submission_date
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`,
- client_id
- HAVING
- country <> "??"
-),
-health_data_aggregates AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`,
- COUNTIF(e_undefined > 0) AS num_clients_e_undefined,
- COUNTIF(e_timeout > 0) AS num_clients_e_timeout,
- COUNTIF(e_abort > 0) AS num_clients_e_abort,
- COUNTIF(e_unreachable > 0) AS num_clients_e_unreachable,
- COUNTIF(e_terminated > 0) AS num_clients_e_terminated,
- COUNTIF(e_channel_open > 0) AS num_clients_e_channel_open,
- FROM
- health_data_sample
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
-final_health_data AS (
- SELECT
- h.country,
- h.city,
- h.geo_subdivision1,
- h.geo_subdivision2,
- h.datetime,
- (num_clients_e_undefined / DAUs.client_count) AS proportion_undefined,
- (num_clients_e_timeout / DAUs.client_count) AS proportion_timeout,
- (num_clients_e_abort / DAUs.client_count) AS proportion_abort,
- (num_clients_e_unreachable / DAUs.client_count) AS proportion_unreachable,
- (num_clients_e_terminated / DAUs.client_count) AS proportion_terminated,
- (num_clients_e_channel_open / DAUs.client_count) AS proportion_channel_open,
- FROM
- health_data_aggregates AS h
- INNER JOIN
- DAUs
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-),
--- Compute aggregates for histograms coming from the health ping.
-histogram_data_sample AS (
- SELECT
- `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`(
- metadata.geo.country,
- metadata.geo.city,
- metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1
- metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2
- ).*,
- client_id,
- document_id,
- TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS time_slot,
- payload.info.subsession_length AS subsession_length,
- mozfun.hist.extract(payload.histograms.dns_failed_lookup_time).values AS dns_fail,
- mozfun.hist.extract(payload.histograms.dns_lookup_time).values AS dns_success,
- mozfun.hist.extract(payload.histograms.ssl_cert_verification_errors).values AS ssl_cert_errors,
- mozfun.hist.extract(
- payload.processes.content.histograms.http_page_tls_handshake
- ).values AS tls_handshake,
- FROM
- `moz-fx-data-shared-prod.telemetry_stable.main_v5`
- WHERE
- DATE(submission_timestamp) = @submission_date
- -- Restrict to Firefox.
- AND normalized_app_name = 'Firefox'
- -- Only to pings who seem to represent an active session.
- AND payload.info.subsession_length >= 0
- -- Country can be null if geoip lookup failed.
- -- There's no point in adding these to the analyses.
- AND metadata.geo.country IS NOT NULL
-),
--- DNS_SUCCESS histogram
-dns_success_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- EXP(SUM(LOG(key) * count) / SUM(count)) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- key,
- SUM(LEAST(2147483648, value)) AS count
- FROM
- histogram_data_sample
- CROSS JOIN
- UNNEST(histogram_data_sample.dns_success)
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- key
- )
- WHERE
- key > 0
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- Oddness: active sessions without DNS_LOOKUP_TIME
-dns_no_dns_lookup_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / (
- 1 + SUM(IF(subsession_length > 0, 1, 0))
- ) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- subsession_length,
- empty(dns_success) AS is_empty
- FROM
- histogram_data_sample
- )
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- A shared source for the DNS_FAIL histogram
-dns_failure_src AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- key,
- SUM(LEAST(2147483648, value)) AS count
- FROM
- histogram_data_sample
- CROSS JOIN
- UNNEST(histogram_data_sample.dns_fail)
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- key
-),
--- DNS_FAIL histogram
-dns_failure_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- EXP(SUM(LOG(key) * count) / SUM(count)) AS value
- FROM
- dns_failure_src
- WHERE
- key > 0
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- DNS_FAIL counts
-dns_failure_counts AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- AVG(count) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- SUM(count) AS count
- FROM
- dns_failure_src
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id
- )
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- Oddness: active sessions without DNS_FAILED_LOOKUP_TIME
-dns_no_dns_failure_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / (
- 1 + SUM(IF(subsession_length > 0, 1, 0))
- ) AS value
- FROM
- (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- client_id,
- time_slot,
- subsession_length,
- empty(dns_fail) AS is_empty
- FROM
- histogram_data_sample
- )
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- SSL_CERT_VERIFICATION_ERRORS histograms
-ssl_error_prop_src AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- document_id,
- subsession_length,
- sum_values(ssl_cert_errors) AS ssl_sum_vals
- FROM
- histogram_data_sample
-),
-ssl_error_prop AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- SUM(IF(subsession_length > 0 AND ssl_sum_vals > 0, 1, 0)) / (
- 1 + SUM(IF(subsession_length > 0, 1, 0))
- ) AS value
- FROM
- ssl_error_prop_src
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-),
--- TLS_HANDSHAKE histogram
-tls_handshake_time AS (
- SELECT
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot AS `datetime`,
- EXP(SUM(LOG(key) * count) / SUM(count)) AS value
- FROM
- (
- SELECT
- country,
- city,
- client_id,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- key,
- SUM(LEAST(2147483648, value)) AS count
- FROM
- histogram_data_sample
- CROSS JOIN
- UNNEST(histogram_data_sample.tls_handshake)
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- time_slot,
- client_id,
- key
- )
- WHERE
- key > 0
- GROUP BY
- country,
- city,
- geo_subdivision1,
- geo_subdivision2,
- `datetime`
- HAVING
- COUNT(*) > 50
-)
-SELECT
- DAUs.country AS country,
- -- Caveat: mindmind location mapping returns `??` in cases where
- -- the population is less than 15k. As long as the entry has a higher level
- -- grouping (country) we still want to keep it. For backwards compatibility,
- -- we rename this value to `unknown` to represent a lack of value.
- IF(DAUs.city = "??", "unknown", DAUs.city) AS city,
- NULLIF(DAUs.geo_subdivision1, "??") AS geo_subdivision1,
- NULLIF(DAUs.geo_subdivision2, "??") AS geo_subdivision2,
- DAUs.datetime AS `datetime`,
- hd.* EXCEPT (`datetime`, country, city, geo_subdivision1, geo_subdivision2),
- ds.value AS avg_dns_success_time,
- ds_missing.value AS missing_dns_success,
- df.value AS avg_dns_failure_time,
- df_missing.value AS missing_dns_failure,
- dfc.value AS count_dns_failure,
- ssl.value AS ssl_error_prop,
- tls.value AS avg_tls_handshake_time
-FROM
- final_health_data AS hd
--- We apply LEFT JOIN here and in the other places instead
--- of a FULL OUTER JOIN. Since LEFT is DAUs, which should contain
--- all the countries and all the days, it should always have matches
--- with whatever we pass on the RIGHT.
--- When doing a FULL OUTER JOIN, we end up sometimes with nulls on the
--- left because there are a few samples coming from telemetry.main that
--- are not accounted for in `moz-fx-data-shared-prod.telemetry.clients_daily`
-LEFT JOIN
- DAUs
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_success_time AS ds
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_no_dns_lookup_time AS ds_missing
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_failure_time AS df
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_failure_counts AS dfc
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- dns_no_dns_failure_time AS df_missing
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- tls_handshake_time AS tls
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
-LEFT JOIN
- ssl_error_prop AS ssl
- USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2)
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 2024-07-26 20:00:50.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 1970-01-01 00:00:00.000000000 +0000
@@ -1,116 +0,0 @@
-fields:
-- mode: NULLABLE
- name: country
- type: STRING
- description: |
- The Country code of the client.
-
-- mode: NULLABLE
- name: city
- type: STRING
- description: |
- The City name (only for cities with a population >= 15000, 'unknown' otherwise).
-
-- mode: NULLABLE
- name: geo_subdivision1
- type: STRING
- description: |
- Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces,
- French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes.
-
-- mode: NULLABLE
- name: geo_subdivision2
- type: STRING
- description: |
- Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces.
-
-- mode: NULLABLE
- name: datetime
- type: TIMESTAMP
- description: |
- The date and the time (truncated to hour) the data was submitted by the client.
-
-- mode: NULLABLE
- name: proportion_undefined
- type: FLOAT
- description: |
- The proportion of users who failed to send telemetry for a reason that was not listed in the other cases.
-
-# yamllint disable rule:line-length
-- mode: NULLABLE
- name: proportion_timeout
- type: FLOAT
- description: |
- The proportion of users that had their connection timeout while uploading telemetry
- ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)).
-# yamllint enable rule:line-length
-
-- mode: NULLABLE
- name: proportion_abort
- type: FLOAT
- description: |
- The proportion of users that had their connection terminated by the client
- (for example, terminating open connections before shutting down).
-
-- mode: NULLABLE
- name: proportion_unreachable
- type: FLOAT
- description: |
- The proportion of users that failed to upload telemetry because the server was not reachable
- (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension).
-
-- mode: NULLABLE
- name: proportion_terminated
- type: FLOAT
- description: |
- The proportion of users that had their connection terminated internally by the networking code.
-
-- mode: NULLABLE
- name: proportion_channel_open
- type: FLOAT
- description: |
- The proportion of users for which the upload request was terminated immediately,
- by the client,because of a Necko internal error.
-
-- mode: NULLABLE
- name: avg_dns_success_time
- type: FLOAT
- description: |
- The average time it takes for a successful DNS resolution, in milliseconds.
-
-- mode: NULLABLE
- name: missing_dns_success
- type: FLOAT
- description: |
-
- Counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram.
-
-- mode: NULLABLE
- name: avg_dns_failure_time
- type: FLOAT
- description: |
- The average time it takes for an unsuccessful DNS resolution, in milliseconds.
-
-- mode: NULLABLE
- name: missing_dns_failure
- type: FLOAT
- description: |
- Counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram.
-
-- mode: NULLABLE
- name: count_dns_failure
- type: FLOAT
- description: |
- The average count of unsuccessful DNS resolutions reported.
-
-- mode: NULLABLE
- name: ssl_error_prop
- type: FLOAT
- description: |
- The proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram.
-
-- mode: NULLABLE
- name: avg_tls_handshake_time
- type: FLOAT
- description: |
- The average time after the TCP SYN to ready for HTTP, in milliseconds.
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml 2024-07-26 20:01:24.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml 2024-07-26 20:17:39.000000000 +0000
@@ -1,11 +1,11 @@
fields:
- name: normalized_app_id
- mode: NULLABLE
type: STRING
+ mode: NULLABLE
description: App ID of the channel data was received from
- name: normalized_channel
- mode: NULLABLE
type: STRING
+ mode: NULLABLE
description: Normalized channel name
- name: additional_properties
type: STRING
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 20:01:57.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 20:20:54.000000000 +0000
@@ -1,10 +1,19 @@
+friendly_name: Event Monitoring Live
+description: |-
+ Materialized view of experimentation related events
+ coming from monitor_frontend.
+owners:
+- ascholtz@mozilla.com
+- akomar@mozilla.com
+labels:
+ materialized_view: true
+ owner1: ascholtz
+ owner2: akomar
+bigquery: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
- - workgroup:dataops-managed/external-fides
-
-# Generated by bigquery_etl.dependency
references:
materialized_view.sql:
- moz-fx-data-shared-prod.monitor_frontend_live.events_v1
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 20:01:57.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 20:20:54.000000000 +0000
@@ -1,10 +1,26 @@
+friendly_name: Monitor Dashboard User Journey Funnels
+description: |-
+ Please provide a description for the query
+owners:
+- ksiegler@mozilla.org
+labels:
+ incremental: true
+ dag: bqetl_generated_funnels
+ owner1: ksiegler
+scheduling:
+ dag_name: bqetl_generated_funnels
+bigquery:
+ time_partitioning:
+ type: day
+ field: submission_date
+ require_partition_filter: false
+ expiration_days: null
+ range_partitioning: null
+ clustering: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
- - workgroup:dataops-managed/external-fides
-
-# Generated by bigquery_etl.dependency
references:
query.sql:
- mozdata.monitor_frontend.events_unnested
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 20:00:49.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 20:10:57.000000000 +0000
@@ -1,7 +1,13 @@
fields:
-- type: DATETIME
- name: build_hour
-- type: INTEGER
- name: geckoview_major_version
-- type: INTEGER
- name: n_pings
+- name: build_hour
+ type: DATETIME
+ mode: NULLABLE
+ description: null
+- name: geckoview_major_version
+ type: INTEGER
+ mode: NULLABLE
+ description: null
+- name: n_pings
+ type: INTEGER
+ mode: NULLABLE
+ description: null
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_cli
```
Fixes https://github.com/mozilla/bigquery-etl/issues/5974
This ensures that UDFs pushed to the stage project also get tested in CI. I tested this in an example UDF that was supposed to fail: https://circleci.com/gh/mozilla/bigquery-etl/422962
Checklist for reviewer:
<username>:<branch>
of the fork as parameter. The parameter will also show up in the logs of themanual-trigger-required-for-fork
CI task together with more detailed instructions.For modifications to schemas in restricted namespaces (see
CODEOWNERS
):┆Issue is synchronized with this Jira Task