mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
253 stars 100 forks source link

Fix skipped routines tests #5976

Closed scholtzan closed 2 months ago

scholtzan commented 2 months ago

Fixes https://github.com/mozilla/bigquery-etl/issues/5974

This ensures that UDFs pushed to the stage project also get tested in CI. I tested this in an example UDF that was supposed to fail: https://circleci.com/gh/mozilla/bigquery-etl/422962

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 months ago

Integration report for "Add broken UDF for testing"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/sql/mozfun/addons: is_adblocker_broken diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 2024-07-25 21:10:45.000000000 +0000 @@ -0,0 +1,2 @@ +description: Returns whether a given Addon ID is an adblocker. +friendly_name: Is Adblocker diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 2024-07-25 21:10:45.000000000 +0000 @@ -0,0 +1,16 @@ +Determine if a given Addon ID is for an adblocker. + +As an example, this query will give the number of users who +have an adblocker installed. +``` +SELECT + submission_date, + COUNT(DISTINCT client_id) AS dau, +FROM + mozdata.telemetry.addons +WHERE + mozfun.addons.is_adblocker(addon_id) + AND submission_date >= "2023-01-01" +GROUP BY + submission_date +``` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 2024-07-25 21:10:45.000000000 +0000 @@ -0,0 +1,19 @@ +CREATE OR REPLACE FUNCTION addons.is_adblocker(addon_id STRING) +RETURNS BOOLEAN AS ( + addon_id IS NOT NULL + AND addon_id IN ( + 'uBlock0@raymondhill.net', -- 'uBlock_Origin' + '{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', -- 'AdBlock_Plus' + 'jid1-NIfFY2CA8fy1tg@jetpack', -- 'Adblock' + 'firefox@ghostery.com', -- 'Ghostery' + 'adblockultimate@adblockultimate.net' -- 'AdBlock_Ultimate' + ) +); + +-- Tests +SELECT + assert.true(addons.is_adblocker('uBlock0@raymondhill.net')), + assert.true(addons.is_adblocker('adblockultimate@adblockultimate.net')), + assert.false(addons.is_adblocker('failing test')), + assert.false(addons.is_adblocker('')), + assert.false(addons.is_adblocker(NULL)), ```

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "Add broken UDF for testing"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py --- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 19:28:20.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 19:46:00.000000000 +0000 @@ -276,6 +276,21 @@ ) ) + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query( + task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1", + destination_table="monitor_dashboard_user_journey_funnels_v1", + dataset_id="monitor_frontend_derived", + project_id="moz-fx-data-shared-prod", + owner="ksiegler@mozilla.org", + email=[ + "ascholtz@mozilla.com", + "ksiegler@mozilla.org", + "telemetry-alerts@mozilla.com", + ], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + accounts_frontend_derived__email_first_reg_login_funnels__v1.set_upstream( wait_for_copy_deduplicate_all ) @@ -325,3 +340,7 @@ firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream( wait_for_firefox_accounts_derived__fxa_stdout_events__v1 ) + + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream( + wait_for_copy_deduplicate_all + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py --- /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py 2024-07-26 19:28:20.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py 2024-07-26 19:45:57.000000000 +0000 @@ -88,12 +88,12 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check( - task_id="checks__warn_internet_outages__global_outages__v1", + checks__fail_internet_outages__global_outages__v1 = bigquery_dq_check( + task_id="checks__fail_internet_outages__global_outages__v1", source_table="global_outages_v1", dataset_id="internet_outages", project_id="moz-fx-data-shared-prod", - is_dq_check_fail=False, + is_dq_check_fail=True, owner="aplacitelli@mozilla.com", email=["aplacitelli@mozilla.com"], depends_on_past=False, @@ -101,9 +101,9 @@ retries=0, ) - checks__warn_internet_outages__global_outages__v2 = bigquery_dq_check( - task_id="checks__warn_internet_outages__global_outages__v2", - source_table="global_outages_v2", + checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check( + task_id="checks__warn_internet_outages__global_outages__v1", + source_table="global_outages_v1", dataset_id="internet_outages", project_id="moz-fx-data-shared-prod", is_dq_check_fail=False, @@ -125,25 +125,14 @@ depends_on_past=False, ) - internet_outages__global_outages__v2 = bigquery_etl_query( - task_id="internet_outages__global_outages__v2", - destination_table="global_outages_v2", - dataset_id="internet_outages", - project_id="moz-fx-data-shared-prod", - owner="aplacitelli@mozilla.com", - email=["aplacitelli@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, + checks__fail_internet_outages__global_outages__v1.set_upstream( + internet_outages__global_outages__v1 ) checks__warn_internet_outages__global_outages__v1.set_upstream( internet_outages__global_outages__v1 ) - checks__warn_internet_outages__global_outages__v2.set_upstream( - internet_outages__global_outages__v2 - ) - internet_outages__global_outages__v1.set_upstream(wait_for_copy_deduplicate_all) internet_outages__global_outages__v1.set_upstream( @@ -153,13 +142,3 @@ internet_outages__global_outages__v1.set_upstream( wait_for_telemetry_derived__clients_daily_joined__v1 ) - - internet_outages__global_outages__v2.set_upstream(wait_for_copy_deduplicate_all) - - internet_outages__global_outages__v2.set_upstream( - wait_for_copy_deduplicate_main_ping - ) - - internet_outages__global_outages__v2.set_upstream( - wait_for_telemetry_derived__clients_daily_joined__v1 - ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-07-26 19:28:20.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-07-26 19:45:57.000000000 +0000 @@ -158,23 +158,6 @@ search_derived__mobile_search_clients_daily__v1 ) - search_derived__mobile_search_clients_daily__v2 = bigquery_etl_query( - task_id="search_derived__mobile_search_clients_daily__v2", - destination_table="mobile_search_clients_daily_v2", - dataset_id="search_derived", - project_id="moz-fx-data-shared-prod", - owner="cmorales@mozilla.com", - email=[ - "akomar@mozilla.com", - "akommasani@mozilla.com", - "anicholson@mozilla.com", - "cmorales@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - search_derived__mobile_search_clients_last_seen__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_last_seen__v1", destination_table="mobile_search_clients_last_seen_v1", @@ -199,10 +182,6 @@ wait_for_copy_deduplicate_all ) - search_derived__mobile_search_clients_daily__v2.set_upstream( - wait_for_copy_deduplicate_all - ) - search_derived__mobile_search_clients_last_seen__v1.set_upstream( search_derived__mobile_search_clients_daily__v1 ) Only in /tmp/workspace/generated-sql/sql/mozfun/addons: is_adblocker_broken Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived: mobile_search_clients_daily_v2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 2024-07-26 19:22:57.000000000 +0000 @@ -0,0 +1,2 @@ +description: Returns whether a given Addon ID is an adblocker. +friendly_name: Is Adblocker diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 2024-07-26 19:22:57.000000000 +0000 @@ -0,0 +1,16 @@ +Determine if a given Addon ID is for an adblocker. + +As an example, this query will give the number of users who +have an adblocker installed. +``` +SELECT + submission_date, + COUNT(DISTINCT client_id) AS dau, +FROM + mozdata.telemetry.addons +WHERE + mozfun.addons.is_adblocker(addon_id) + AND submission_date >= "2023-01-01" +GROUP BY + submission_date +``` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 2024-07-26 19:22:57.000000000 +0000 @@ -0,0 +1,19 @@ +CREATE OR REPLACE FUNCTION addons.is_adblocker_broken(addon_id STRING) +RETURNS BOOLEAN AS ( + addon_id IS NOT NULL + AND addon_id IN ( + 'uBlock0@raymondhill.net', -- 'uBlock_Origin' + '{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', -- 'AdBlock_Plus' + 'jid1-NIfFY2CA8fy1tg@jetpack', -- 'Adblock' + 'firefox@ghostery.com', -- 'Ghostery' + 'adblockultimate@adblockultimate.net' -- 'AdBlock_Ultimate' + ) +); + +-- Tests +SELECT + assert.true(addons.is_adblocker_broken('uBlock0@raymondhill.net')), + assert.true(addons.is_adblocker_broken('adblockultimate@adblockultimate.net')), + assert.false(addons.is_adblocker_broken('failing test')), + assert.false(addons.is_adblocker_broken('')), + assert.false(addons.is_adblocker_broken(NULL)), diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 19:33:47.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 19:33:47.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 19:32:35.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 19:32:36.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-07-26 19:22:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-07-26 19:33:28.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - mode: NULLABLE type: STRING + mode: NULLABLE description: App ID of the channel data was received from - name: normalized_channel - mode: NULLABLE type: STRING + mode: NULLABLE description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 19:31:31.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -16,103 +17,110 @@ - name: page_url type: STRING mode: NULLABLE - description: |- - The final page URL linked by a search result after any skip redirects. - This will be null for anonymized Discover impressions. + description: 'The final page URL linked by a search result after any skip redirects. + + This will be null for anonymized Discover impressions.' - name: page_domain_name type: STRING mode: NULLABLE - description: |- - Domain name of the page URL. - This will be null for anonymized Discover impressions. + description: 'Domain name of the page URL. + + This will be null for anonymized Discover impressions.' - name: page_path type: STRING mode: NULLABLE - description: |- - The path part of the page URL. - This will be null for anonymized Discover impressions. + description: 'The path part of the page URL. + + This will be null for anonymized Discover impressions.' - name: localized_site_code type: STRING mode: NULLABLE - description: |- - Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site code such as `en-US` or `de` found in the first segment + of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site type: STRING mode: NULLABLE - description: |- - Description of the localized site language and/or country based on `localized_site_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Description of the localized site language and/or country based on + `localized_site_code` (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language_code type: STRING mode: NULLABLE - description: |- - Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language code in ISO-639-alpha-2 format found in the + first segment of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language type: STRING mode: NULLABLE - description: |- - Localized site language based on `localized_site_language_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language based on `localized_site_language_code` (if + any). + + This will be null for anonymized Discover impressions.' - name: query type: STRING mode: NULLABLE - description: |- - The search query. - This will be null for anonymized search impressions, and all Discover and Google News search impressions. + description: 'The search query. + + This will be null for anonymized search impressions, and all Discover and Google + News search impressions.' - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. - This will be null for all Discover and Google News search impressions. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\ + \ and Google News search impressions." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + The `query` field will be null for anonymized search impressions. - The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions. + + The `country_code`, `page_url`, and related fields will be null for anonymized + Discover impressions.' - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'Whether Google Search considers the page to be providing a good page + experience. + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. - * Discover: In Google's Discover feed. - * Google News: On news.google.com or in the Google News app on Android and iOS. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\ + \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\ + \ or in the Google News app on Android and iOS." - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'How the search result appeared (e.g. normal result, translated result, + video). + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: user_country_code type: STRING mode: NULLABLE - description: |- - Country from which the user was searching, in ISO-3166-1-alpha-3 format. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format. + + This will be null for anonymized Discover impressions.' - name: user_country type: STRING mode: NULLABLE - description: |- - Country from which the user was searching. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching. + + This will be null for anonymized Discover impressions.' - name: user_region type: STRING mode: NULLABLE @@ -124,13 +132,15 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. - This will be null for Discover impressions. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet. + + This will be null for Discover impressions.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with a link to the page were shown to a user. + description: The number of times that search results with a link to the page were + shown to a user. - name: clicks type: INTEGER mode: NULLABLE @@ -138,6 +148,7 @@ - name: average_position type: FLOAT mode: NULLABLE - description: |- - The average position of the page in the search results, where `1` is the topmost position. - This will be null for Discover and Google News search impressions. + description: 'The average position of the page in the search results, where `1` + is the topmost position. + + This will be null for Discover and Google News search impressions.' diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 19:31:31.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -20,27 +21,23 @@ - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. - The `query` field will be null for anonymized search impressions. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + + The `query` field will be null for anonymized search impressions.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." - name: user_country_code type: STRING mode: NULLABLE @@ -60,17 +57,20 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with at least one link to the site were shown to a user. + description: The number of times that search results with at least one link to the + site were shown to a user. - name: clicks type: INTEGER mode: NULLABLE - description: The number of times a user clicked at least one search result link to the site. + description: The number of times a user clicked at least one search result link + to the site. - name: average_top_position type: FLOAT mode: NULLABLE - description: The average top position of the site in the search results, where `1` is the topmost position. + description: The average top position of the site in the search results, where `1` + is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 2024-07-26 19:22:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -friendly_name: Global Outages -description: |- - Please provide a description for the query -owners: [] -labels: {} -bigquery: null -workgroup_access: -- role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential - - workgroup:internet-outages/external -references: - view.sql: - - moz-fx-data-shared-prod.internet_outages.global_outages_v1 - - moz-fx-data-shared-prod.internet_outages.global_outages_v2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 2024-07-26 19:22:45.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -fields: -- name: country - type: STRING - mode: NULLABLE -- name: city - type: STRING - mode: NULLABLE -- name: geo_subdivision1 - type: STRING - mode: NULLABLE -- name: geo_subdivision2 - type: STRING - mode: NULLABLE -- name: datetime - type: TIMESTAMP - mode: NULLABLE -- name: proportion_undefined - type: FLOAT - mode: NULLABLE -- name: proportion_timeout - type: FLOAT - mode: NULLABLE -- name: proportion_abort - type: FLOAT - mode: NULLABLE -- name: proportion_unreachable - type: FLOAT - mode: NULLABLE -- name: proportion_terminated - type: FLOAT - mode: NULLABLE -- name: proportion_channel_open - type: FLOAT - mode: NULLABLE -- name: avg_dns_success_time - type: FLOAT - mode: NULLABLE -- name: missing_dns_success - type: FLOAT - mode: NULLABLE -- name: avg_dns_failure_time - type: FLOAT - mode: NULLABLE -- name: missing_dns_failure - type: FLOAT - mode: NULLABLE -- name: count_dns_failure - type: FLOAT - mode: NULLABLE -- name: ssl_error_prop - type: FLOAT - mode: NULLABLE -- name: avg_tls_handshake_time - type: FLOAT - mode: NULLABLE -- name: dataset_version - type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 2024-07-26 19:22:08.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.internet_outages.global_outages` -AS -SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime`, - proportion_undefined, - proportion_timeout, - proportion_abort, - proportion_unreachable, - proportion_terminated, - proportion_channel_open, - avg_dns_success_time, - missing_dns_success, - avg_dns_failure_time, - missing_dns_failure, - count_dns_failure, - ssl_error_prop, - avg_tls_handshake_time, - "v2" AS dataset_version, -FROM - `moz-fx-data-shared-prod.internet_outages.global_outages_v2` -WHERE - DATE(`datetime`) >= "2024-07-26" -UNION ALL -SELECT - country, - city, - CAST(NULL AS STRING) AS geo_subdivision1, - CAST(NULL AS STRING) AS geo_subdivision2, - `datetime`, - proportion_undefined, - proportion_timeout, - proportion_abort, - proportion_unreachable, - proportion_terminated, - proportion_channel_open, - avg_dns_success_time, - missing_dns_success, - avg_dns_failure_time, - missing_dns_failure, - count_dns_failure, - ssl_error_prop, - avg_tls_handshake_time, - "v1" AS dataset_version, -FROM - `moz-fx-data-shared-prod.internet_outages.global_outages_v1` -WHERE - DATE(`datetime`) < "2024-07-26" diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 19:22:08.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 19:22:57.000000000 +0000 @@ -1,10 +1,10 @@ -#warn +#fail {{ min_row_count(1000, where="DATE(datetime) = @submission_date") }} -#warn +#fail {{ is_unique(columns=["datetime", "city", "country"], where="DATE(`datetime`) = @submission_date") }} -#warn +#fail /* This statement used to contain the following fields, but these are sometimes missing from country/city combinations diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 19:23:59.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 19:41:12.000000000 +0000 @@ -1,4 +1,4 @@ -friendly_name: Internet Outages (Version 1) +friendly_name: Internet Outages description: |- This contains a set aggregated metrics that correlate to internet outages for different countries in the world. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 2024-07-26 19:22:08.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -{% set _WHERE = '@submission_date = @submission_date' %} - -#warn -{{ min_row_count(1000, where=_WHERE) }} - -#warn -{{ is_unique(columns=["country", "city", "geo_subdivision1", "geo_subdivision2", "datetime"], where=_WHERE) }} - -#warn -/* - This statement used to contain the following fields, - but these are sometimes missing from country/city combinations - See https://sql.telemetry.mozilla.org/queries/96541/source - and bug 1868674 - - "avg_tls_handshake_time" - "count_dns_failure" -*/ -{{ not_null(columns=[ - "country", - "city", - "datetime", - "proportion_undefined", - "proportion_timeout", - "proportion_abort", - "proportion_unreachable", - "proportion_terminated", - "proportion_channel_open", - "avg_dns_success_time", - "missing_dns_success", - "avg_dns_failure_time", - "missing_dns_failure", - "ssl_error_prop", - -], where=_WHERE) }} - -#warn -{{ value_length(column="country", expected_length=2, where=_WHERE) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 2024-07-26 19:23:59.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -friendly_name: Internet Outages (version 2) -description: |- - Note: Version 2 contains two additional fields: geo_subdivision1 && geo_subdivision2 for more accurate data grouping - and the data available starting 2024/07/26. - - This contains a set aggregated metrics that correlate to internet - outages for different countries in the world. - - The dataset contains the following fields: - - `country`: - the Country code of the client. - - `city`: - the City name (only for cities with a population >= 15000, 'unknown' otherwise). - - `geo_subdivision1`: - Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces, - French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes. - - `geo_subdivision2`: - Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces. - - `datetime`: - the date and the time (truncated to hour) the data was submitted by the client. - - `proportion_undefined`: - the proportion of users who failed to send telemetry for a reason that was not listed in the other cases. - - `proportion_timeout`: - the proportion of users that had their connection timeout while uploading telemetry - ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)). - - `proportion_abort`: - the proportion of users that had their connection terminated by the client (for example, terminating open connections before shutting down). - - `proportion_unreachable`: - the proportion of users that failed to upload telemetry because the server was not reachable - (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension). - - `proportion_terminated`: - the proportion of users that had their connection terminated internally by the networking code. - - `proportion_channel_open`: - the proportion of users for which the upload request was terminated immediately, by the client, because of a Necko internal error. - - `avg_dns_success_time`: - the average time it takes for a successful DNS resolution, in milliseconds. - - `missing_dns_success`: - counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram. - - `avg_dns_failure_time`: - the average time it takes for an unsuccessful DNS resolution, in milliseconds. - - `missing_dns_failure`: - counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram. - - `count_dns_failure`: - the average count of unsuccessful DNS resolutions reported. - - `ssl_error_prop`: - the proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram. - - `avg_tls_handshake_time`: - the average time after the TCP SYN to ready for HTTP, in milliseconds. - - Caveats with the data: - As with any observational data, there are many caveats and interpretation must - be done carefully. Below is a list of issues we have considered, but it is not - exhaustive. - - Firefox users are not representative of the general population in their - region. - - Users can experience multiple types of failures and so the proportions - are not summable. For example, if 2.4% of clients had a timeout and 2.6% of - clients had eUnreachable that doesn't necessarily mean that 5.0% of clients - had a timeout or a eUnreachable - - Geographical data is based on IPGeo databases. These databases are - imperfect, so some activity may be attributed to the wrong location. - Further, proxy and VPN usage can create geo-attribution errors. - - Data contains duplicates prior to mid-2022. See [this bug](https://bugzilla.mozilla.org/show_bug.cgi?id=1753470) - for further information. -owners: -- aplacitelli@mozilla.com -labels: - incremental: true - review_bugs: - - '1640204' - dag: bqetl_internet_outages - owner1: aplacitelli -scheduling: - dag_name: bqetl_internet_outages -bigquery: - time_partitioning: - type: day - field: datetime - require_partition_filter: false - expiration_days: null - range_partitioning: null - clustering: - fields: - - country - - city -workgroup_access: -- role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential - - workgroup:internet-outages/external -references: - checks.sql: - - .. - query.sql: - - C - - moz-fx-data-shared-prod.telemetry.clients_daily - - moz-fx-data-shared-prod.telemetry.health - - moz-fx-data-shared-prod.telemetry_stable.main_v5 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 2024-07-26 19:22:08.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,557 +0,0 @@ --- This sums the values reported by an histogram. -CREATE TEMP FUNCTION sum_values(x ARRAY>) AS ( - ( - WITH a AS ( - SELECT - IF(ARRAY_LENGTH(x) > 0, 1, 0) AS isPres1 - ), - b AS ( - SELECT - -- Truncate pathological values that are beyond the documented limits per - -- https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/collection/histograms.html#histogram-values - SUM(LEAST(2147483648, value)) AS t - FROM - UNNEST(x) - WHERE - key > 0 - ) - SELECT - COALESCE(isPres1 * t, 0) - FROM - a, - b - ) -); - --- This counts how many times an histogram is not present. --- It checks if the histogram is present at all and whether or not it recorded --- any non-zero value. -CREATE TEMP FUNCTION empty(x ARRAY>) AS ( - ( - WITH a AS ( - SELECT - IF(ARRAY_LENGTH(x) = 0, 1, 0) AS isEmpty1 - ), - b AS ( - SELECT - IF(MAX(value) = 0, 1, 0) isEmpty2 - FROM - UNNEST(x) - ), - c AS ( - SELECT - IF(isEmpty2 = 1 OR isEmpty1 = 1, 1, 0) AS Empty - FROM - a, - b - ) - SELECT - Empty - FROM - C - ) -); - --- Get a stable source for DAUs. -WITH DAUs AS ( - SELECT - `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`( - country, - city, - geo_subdivision1, - geo_subdivision2 - ).*, - -- Truncate the submission timestamp to the hour. Note that this filed was - -- introduced on the 16th December 2019, so it will be `null` for queries - -- before that day. See https://github.com/mozilla/bigquery-etl/pull/603 . - TIMESTAMP_TRUNC(submission_timestamp_min, HOUR) AS `datetime`, - COUNT(*) AS client_count - FROM - `moz-fx-data-shared-prod.telemetry.clients_daily` - WHERE - submission_date = @submission_date - -- Country can be null or `??` if geoip lookup failed. - -- There's no point in adding these to the analyses. - -- Due to a bug in `telemetry.clients_daily` we need to - -- check for '??' as well in addition to null. - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - -- Filter filter out cities for which we have less than or equal to - -- 50 hourly active users. This will make sure data won't end up in - -- the final table. - HAVING - country <> "??" - AND client_count > 50 -), --- Compute aggregates for the health data. -health_data_sample AS ( - SELECT - `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`( - metadata.geo.country, - metadata.geo.city, - metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1 - metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2 - ).*, - TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`, - client_id, - SUM( - COALESCE( - SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.undefined') AS INT64), - 0 - ) - ) AS e_undefined, - SUM( - COALESCE( - SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.timeout') AS INT64), - 0 - ) - ) AS e_timeout, - SUM( - COALESCE( - SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.abort') AS INT64), - 0 - ) - ) AS e_abort, - SUM( - COALESCE( - SAFE_CAST( - JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eUnreachable') AS INT64 - ), - 0 - ) - ) AS e_unreachable, - SUM( - COALESCE( - SAFE_CAST( - JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eTerminated') AS INT64 - ), - 0 - ) - ) AS e_terminated, - SUM( - COALESCE( - SAFE_CAST( - JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eChannelOpen') AS INT64 - ), - 0 - ) - ) AS e_channel_open, - FROM - `moz-fx-data-shared-prod.telemetry.health` - WHERE - DATE(submission_timestamp) = @submission_date - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime`, - client_id - HAVING - country <> "??" -), -health_data_aggregates AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime`, - COUNTIF(e_undefined > 0) AS num_clients_e_undefined, - COUNTIF(e_timeout > 0) AS num_clients_e_timeout, - COUNTIF(e_abort > 0) AS num_clients_e_abort, - COUNTIF(e_unreachable > 0) AS num_clients_e_unreachable, - COUNTIF(e_terminated > 0) AS num_clients_e_terminated, - COUNTIF(e_channel_open > 0) AS num_clients_e_channel_open, - FROM - health_data_sample - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), -final_health_data AS ( - SELECT - h.country, - h.city, - h.geo_subdivision1, - h.geo_subdivision2, - h.datetime, - (num_clients_e_undefined / DAUs.client_count) AS proportion_undefined, - (num_clients_e_timeout / DAUs.client_count) AS proportion_timeout, - (num_clients_e_abort / DAUs.client_count) AS proportion_abort, - (num_clients_e_unreachable / DAUs.client_count) AS proportion_unreachable, - (num_clients_e_terminated / DAUs.client_count) AS proportion_terminated, - (num_clients_e_channel_open / DAUs.client_count) AS proportion_channel_open, - FROM - health_data_aggregates AS h - INNER JOIN - DAUs - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -), --- Compute aggregates for histograms coming from the health ping. -histogram_data_sample AS ( - SELECT - `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`( - metadata.geo.country, - metadata.geo.city, - metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1 - metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2 - ).*, - client_id, - document_id, - TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS time_slot, - payload.info.subsession_length AS subsession_length, - mozfun.hist.extract(payload.histograms.dns_failed_lookup_time).values AS dns_fail, - mozfun.hist.extract(payload.histograms.dns_lookup_time).values AS dns_success, - mozfun.hist.extract(payload.histograms.ssl_cert_verification_errors).values AS ssl_cert_errors, - mozfun.hist.extract( - payload.processes.content.histograms.http_page_tls_handshake - ).values AS tls_handshake, - FROM - `moz-fx-data-shared-prod.telemetry_stable.main_v5` - WHERE - DATE(submission_timestamp) = @submission_date - -- Restrict to Firefox. - AND normalized_app_name = 'Firefox' - -- Only to pings who seem to represent an active session. - AND payload.info.subsession_length >= 0 - -- Country can be null if geoip lookup failed. - -- There's no point in adding these to the analyses. - AND metadata.geo.country IS NOT NULL -), --- DNS_SUCCESS histogram -dns_success_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - EXP(SUM(LOG(key) * count) / SUM(count)) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - key, - SUM(LEAST(2147483648, value)) AS count - FROM - histogram_data_sample - CROSS JOIN - UNNEST(histogram_data_sample.dns_success) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - key - ) - WHERE - key > 0 - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- Oddness: active sessions without DNS_LOOKUP_TIME -dns_no_dns_lookup_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / ( - 1 + SUM(IF(subsession_length > 0, 1, 0)) - ) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - subsession_length, - empty(dns_success) AS is_empty - FROM - histogram_data_sample - ) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- A shared source for the DNS_FAIL histogram -dns_failure_src AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - key, - SUM(LEAST(2147483648, value)) AS count - FROM - histogram_data_sample - CROSS JOIN - UNNEST(histogram_data_sample.dns_fail) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - key -), --- DNS_FAIL histogram -dns_failure_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - EXP(SUM(LOG(key) * count) / SUM(count)) AS value - FROM - dns_failure_src - WHERE - key > 0 - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- DNS_FAIL counts -dns_failure_counts AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - AVG(count) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - SUM(count) AS count - FROM - dns_failure_src - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id - ) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- Oddness: active sessions without DNS_FAILED_LOOKUP_TIME -dns_no_dns_failure_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / ( - 1 + SUM(IF(subsession_length > 0, 1, 0)) - ) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - subsession_length, - empty(dns_fail) AS is_empty - FROM - histogram_data_sample - ) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- SSL_CERT_VERIFICATION_ERRORS histograms -ssl_error_prop_src AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - document_id, - subsession_length, - sum_values(ssl_cert_errors) AS ssl_sum_vals - FROM - histogram_data_sample -), -ssl_error_prop AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - SUM(IF(subsession_length > 0 AND ssl_sum_vals > 0, 1, 0)) / ( - 1 + SUM(IF(subsession_length > 0, 1, 0)) - ) AS value - FROM - ssl_error_prop_src - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- TLS_HANDSHAKE histogram -tls_handshake_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - EXP(SUM(LOG(key) * count) / SUM(count)) AS value - FROM - ( - SELECT - country, - city, - client_id, - geo_subdivision1, - geo_subdivision2, - time_slot, - key, - SUM(LEAST(2147483648, value)) AS count - FROM - histogram_data_sample - CROSS JOIN - UNNEST(histogram_data_sample.tls_handshake) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - key - ) - WHERE - key > 0 - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -) -SELECT - DAUs.country AS country, - -- Caveat: mindmind location mapping returns `??` in cases where - -- the population is less than 15k. As long as the entry has a higher level - -- grouping (country) we still want to keep it. For backwards compatibility, - -- we rename this value to `unknown` to represent a lack of value. - IF(DAUs.city = "??", "unknown", DAUs.city) AS city, - NULLIF(DAUs.geo_subdivision1, "??") AS geo_subdivision1, - NULLIF(DAUs.geo_subdivision2, "??") AS geo_subdivision2, - DAUs.datetime AS `datetime`, - hd.* EXCEPT (`datetime`, country, city, geo_subdivision1, geo_subdivision2), - ds.value AS avg_dns_success_time, - ds_missing.value AS missing_dns_success, - df.value AS avg_dns_failure_time, - df_missing.value AS missing_dns_failure, - dfc.value AS count_dns_failure, - ssl.value AS ssl_error_prop, - tls.value AS avg_tls_handshake_time -FROM - final_health_data AS hd --- We apply LEFT JOIN here and in the other places instead --- of a FULL OUTER JOIN. Since LEFT is DAUs, which should contain --- all the countries and all the days, it should always have matches --- with whatever we pass on the RIGHT. --- When doing a FULL OUTER JOIN, we end up sometimes with nulls on the --- left because there are a few samples coming from telemetry.main that --- are not accounted for in `moz-fx-data-shared-prod.telemetry.clients_daily` -LEFT JOIN - DAUs - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_success_time AS ds - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_no_dns_lookup_time AS ds_missing - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_failure_time AS df - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_failure_counts AS dfc - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_no_dns_failure_time AS df_missing - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - tls_handshake_time AS tls - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - ssl_error_prop AS ssl - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 2024-07-26 19:22:08.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,116 +0,0 @@ -fields: -- mode: NULLABLE - name: country - type: STRING - description: | - The Country code of the client. - -- mode: NULLABLE - name: city - type: STRING - description: | - The City name (only for cities with a population >= 15000, 'unknown' otherwise). - -- mode: NULLABLE - name: geo_subdivision1 - type: STRING - description: | - Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces, - French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes. - -- mode: NULLABLE - name: geo_subdivision2 - type: STRING - description: | - Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces. - -- mode: NULLABLE - name: datetime - type: TIMESTAMP - description: | - The date and the time (truncated to hour) the data was submitted by the client. - -- mode: NULLABLE - name: proportion_undefined - type: FLOAT - description: | - The proportion of users who failed to send telemetry for a reason that was not listed in the other cases. - -# yamllint disable rule:line-length -- mode: NULLABLE - name: proportion_timeout - type: FLOAT - description: | - The proportion of users that had their connection timeout while uploading telemetry - ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)). -# yamllint enable rule:line-length - -- mode: NULLABLE - name: proportion_abort - type: FLOAT - description: | - The proportion of users that had their connection terminated by the client - (for example, terminating open connections before shutting down). - -- mode: NULLABLE - name: proportion_unreachable - type: FLOAT - description: | - The proportion of users that failed to upload telemetry because the server was not reachable - (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension). - -- mode: NULLABLE - name: proportion_terminated - type: FLOAT - description: | - The proportion of users that had their connection terminated internally by the networking code. - -- mode: NULLABLE - name: proportion_channel_open - type: FLOAT - description: | - The proportion of users for which the upload request was terminated immediately, - by the client,because of a Necko internal error. - -- mode: NULLABLE - name: avg_dns_success_time - type: FLOAT - description: | - The average time it takes for a successful DNS resolution, in milliseconds. - -- mode: NULLABLE - name: missing_dns_success - type: FLOAT - description: | - - Counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram. - -- mode: NULLABLE - name: avg_dns_failure_time - type: FLOAT - description: | - The average time it takes for an unsuccessful DNS resolution, in milliseconds. - -- mode: NULLABLE - name: missing_dns_failure - type: FLOAT - description: | - Counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram. - -- mode: NULLABLE - name: count_dns_failure - type: FLOAT - description: | - The average count of unsuccessful DNS resolutions reported. - -- mode: NULLABLE - name: ssl_error_prop - type: FLOAT - description: | - The proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram. - -- mode: NULLABLE - name: avg_tls_handshake_time - type: FLOAT - description: | - The average time after the TCP SYN to ready for HTTP, in milliseconds. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 19:23:17.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 19:41:12.000000000 +0000 @@ -1,10 +1,19 @@ +friendly_name: Event Monitoring Live +description: |- + Materialized view of experimentation related events + coming from monitor_frontend. +owners: +- ascholtz@mozilla.com +- akomar@mozilla.com +labels: + materialized_view: true + owner1: ascholtz + owner2: akomar +bigquery: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: materialized_view.sql: - moz-fx-data-shared-prod.monitor_frontend_live.events_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 19:23:17.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 19:41:12.000000000 +0000 @@ -1,10 +1,26 @@ +friendly_name: Monitor Dashboard User Journey Funnels +description: |- + Please provide a description for the query +owners: +- ksiegler@mozilla.org +labels: + incremental: true + dag: bqetl_generated_funnels + owner1: ksiegler +scheduling: + dag_name: bqetl_generated_funnels +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: query.sql: - mozdata.monitor_frontend.events_unnested diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 19:22:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 19:31:49.000000000 +0000 @@ -1,7 +1,13 @@ fields: -- type: DATETIME - name: build_hour -- type: INTEGER - name: geckoview_major_version -- type: INTEGER - name: n_pings +- name: build_hour + type: DATETIME + mode: NULLABLE + description: null +- name: geckoview_major_version + type: INTEGER + mode: NULLABLE + description: null +- name: n_pings + type: INTEGER + mode: NULLABLE + description: null diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_ ```

⚠️ Only part of the diff is displayed.

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "Add broken UDF for testing"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py --- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 20:06:50.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-07-26 20:26:02.000000000 +0000 @@ -276,6 +276,21 @@ ) ) + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query( + task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1", + destination_table="monitor_dashboard_user_journey_funnels_v1", + dataset_id="monitor_frontend_derived", + project_id="moz-fx-data-shared-prod", + owner="ksiegler@mozilla.org", + email=[ + "ascholtz@mozilla.com", + "ksiegler@mozilla.org", + "telemetry-alerts@mozilla.com", + ], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + accounts_frontend_derived__email_first_reg_login_funnels__v1.set_upstream( wait_for_copy_deduplicate_all ) @@ -325,3 +340,7 @@ firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream( wait_for_firefox_accounts_derived__fxa_stdout_events__v1 ) + + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream( + wait_for_copy_deduplicate_all + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py --- /tmp/workspace/main-generated-sql/dags/bqetl_internet_outages.py 2024-07-26 20:06:50.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_internet_outages.py 2024-07-26 20:25:57.000000000 +0000 @@ -88,12 +88,12 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check( - task_id="checks__warn_internet_outages__global_outages__v1", + checks__fail_internet_outages__global_outages__v1 = bigquery_dq_check( + task_id="checks__fail_internet_outages__global_outages__v1", source_table="global_outages_v1", dataset_id="internet_outages", project_id="moz-fx-data-shared-prod", - is_dq_check_fail=False, + is_dq_check_fail=True, owner="aplacitelli@mozilla.com", email=["aplacitelli@mozilla.com"], depends_on_past=False, @@ -101,9 +101,9 @@ retries=0, ) - checks__warn_internet_outages__global_outages__v2 = bigquery_dq_check( - task_id="checks__warn_internet_outages__global_outages__v2", - source_table="global_outages_v2", + checks__warn_internet_outages__global_outages__v1 = bigquery_dq_check( + task_id="checks__warn_internet_outages__global_outages__v1", + source_table="global_outages_v1", dataset_id="internet_outages", project_id="moz-fx-data-shared-prod", is_dq_check_fail=False, @@ -125,25 +125,14 @@ depends_on_past=False, ) - internet_outages__global_outages__v2 = bigquery_etl_query( - task_id="internet_outages__global_outages__v2", - destination_table="global_outages_v2", - dataset_id="internet_outages", - project_id="moz-fx-data-shared-prod", - owner="aplacitelli@mozilla.com", - email=["aplacitelli@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, + checks__fail_internet_outages__global_outages__v1.set_upstream( + internet_outages__global_outages__v1 ) checks__warn_internet_outages__global_outages__v1.set_upstream( internet_outages__global_outages__v1 ) - checks__warn_internet_outages__global_outages__v2.set_upstream( - internet_outages__global_outages__v2 - ) - internet_outages__global_outages__v1.set_upstream(wait_for_copy_deduplicate_all) internet_outages__global_outages__v1.set_upstream( @@ -153,13 +142,3 @@ internet_outages__global_outages__v1.set_upstream( wait_for_telemetry_derived__clients_daily_joined__v1 ) - - internet_outages__global_outages__v2.set_upstream(wait_for_copy_deduplicate_all) - - internet_outages__global_outages__v2.set_upstream( - wait_for_copy_deduplicate_main_ping - ) - - internet_outages__global_outages__v2.set_upstream( - wait_for_telemetry_derived__clients_daily_joined__v1 - ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-07-26 20:06:50.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-07-26 20:25:58.000000000 +0000 @@ -158,23 +158,6 @@ search_derived__mobile_search_clients_daily__v1 ) - search_derived__mobile_search_clients_daily__v2 = bigquery_etl_query( - task_id="search_derived__mobile_search_clients_daily__v2", - destination_table="mobile_search_clients_daily_v2", - dataset_id="search_derived", - project_id="moz-fx-data-shared-prod", - owner="cmorales@mozilla.com", - email=[ - "akomar@mozilla.com", - "akommasani@mozilla.com", - "anicholson@mozilla.com", - "cmorales@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - search_derived__mobile_search_clients_last_seen__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_last_seen__v1", destination_table="mobile_search_clients_last_seen_v1", @@ -199,10 +182,6 @@ wait_for_copy_deduplicate_all ) - search_derived__mobile_search_clients_daily__v2.set_upstream( - wait_for_copy_deduplicate_all - ) - search_derived__mobile_search_clients_last_seen__v1.set_upstream( search_derived__mobile_search_clients_daily__v1 ) Only in /tmp/workspace/generated-sql/sql/mozfun/addons: is_adblocker_broken Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages: global_outages_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived: mobile_search_clients_daily_v2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/metadata.yaml 2024-07-26 20:01:38.000000000 +0000 @@ -0,0 +1,2 @@ +description: Returns whether a given Addon ID is an adblocker. +friendly_name: Is Adblocker diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/README.md 2024-07-26 20:01:38.000000000 +0000 @@ -0,0 +1,16 @@ +Determine if a given Addon ID is for an adblocker. + +As an example, this query will give the number of users who +have an adblocker installed. +``` +SELECT + submission_date, + COUNT(DISTINCT client_id) AS dau, +FROM + mozdata.telemetry.addons +WHERE + mozfun.addons.is_adblocker(addon_id) + AND submission_date >= "2023-01-01" +GROUP BY + submission_date +``` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql --- /tmp/workspace/main-generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/addons/is_adblocker_broken/udf.sql 2024-07-26 20:01:38.000000000 +0000 @@ -0,0 +1,19 @@ +CREATE OR REPLACE FUNCTION addons.is_adblocker_broken(addon_id STRING) +RETURNS BOOLEAN AS ( + addon_id IS NOT NULL + AND addon_id IN ( + 'uBlock0@raymondhill.net', -- 'uBlock_Origin' + '{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', -- 'AdBlock_Plus' + 'jid1-NIfFY2CA8fy1tg@jetpack', -- 'Adblock' + 'firefox@ghostery.com', -- 'Ghostery' + 'adblockultimate@adblockultimate.net' -- 'AdBlock_Ultimate' + ) +); + +-- Tests +SELECT + assert.true(addons.is_adblocker_broken('borken')), + assert.true(addons.is_adblocker_broken('adblockultimate@adblockultimate.net')), + assert.false(addons.is_adblocker_broken('failing test')), + assert.false(addons.is_adblocker_broken('')), + assert.false(addons.is_adblocker_broken(NULL)), diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-07-26 20:10:32.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-07-26 20:10:32.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-07-26 20:11:42.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-07-26 20:11:43.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-07-26 20:10:47.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -16,103 +17,110 @@ - name: page_url type: STRING mode: NULLABLE - description: |- - The final page URL linked by a search result after any skip redirects. - This will be null for anonymized Discover impressions. + description: 'The final page URL linked by a search result after any skip redirects. + + This will be null for anonymized Discover impressions.' - name: page_domain_name type: STRING mode: NULLABLE - description: |- - Domain name of the page URL. - This will be null for anonymized Discover impressions. + description: 'Domain name of the page URL. + + This will be null for anonymized Discover impressions.' - name: page_path type: STRING mode: NULLABLE - description: |- - The path part of the page URL. - This will be null for anonymized Discover impressions. + description: 'The path part of the page URL. + + This will be null for anonymized Discover impressions.' - name: localized_site_code type: STRING mode: NULLABLE - description: |- - Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site code such as `en-US` or `de` found in the first segment + of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site type: STRING mode: NULLABLE - description: |- - Description of the localized site language and/or country based on `localized_site_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Description of the localized site language and/or country based on + `localized_site_code` (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language_code type: STRING mode: NULLABLE - description: |- - Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language code in ISO-639-alpha-2 format found in the + first segment of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language type: STRING mode: NULLABLE - description: |- - Localized site language based on `localized_site_language_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language based on `localized_site_language_code` (if + any). + + This will be null for anonymized Discover impressions.' - name: query type: STRING mode: NULLABLE - description: |- - The search query. - This will be null for anonymized search impressions, and all Discover and Google News search impressions. + description: 'The search query. + + This will be null for anonymized search impressions, and all Discover and Google + News search impressions.' - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. - This will be null for all Discover and Google News search impressions. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\ + \ and Google News search impressions." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + The `query` field will be null for anonymized search impressions. - The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions. + + The `country_code`, `page_url`, and related fields will be null for anonymized + Discover impressions.' - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'Whether Google Search considers the page to be providing a good page + experience. + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. - * Discover: In Google's Discover feed. - * Google News: On news.google.com or in the Google News app on Android and iOS. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\ + \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\ + \ or in the Google News app on Android and iOS." - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'How the search result appeared (e.g. normal result, translated result, + video). + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: user_country_code type: STRING mode: NULLABLE - description: |- - Country from which the user was searching, in ISO-3166-1-alpha-3 format. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format. + + This will be null for anonymized Discover impressions.' - name: user_country type: STRING mode: NULLABLE - description: |- - Country from which the user was searching. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching. + + This will be null for anonymized Discover impressions.' - name: user_region type: STRING mode: NULLABLE @@ -124,13 +132,15 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. - This will be null for Discover impressions. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet. + + This will be null for Discover impressions.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with a link to the page were shown to a user. + description: The number of times that search results with a link to the page were + shown to a user. - name: clicks type: INTEGER mode: NULLABLE @@ -138,6 +148,7 @@ - name: average_position type: FLOAT mode: NULLABLE - description: |- - The average position of the page in the search results, where `1` is the topmost position. - This will be null for Discover and Google News search impressions. + description: 'The average position of the page in the search results, where `1` + is the topmost position. + + This will be null for Discover and Google News search impressions.' diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-07-26 20:10:48.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -20,27 +21,23 @@ - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. - The `query` field will be null for anonymized search impressions. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + + The `query` field will be null for anonymized search impressions.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." - name: user_country_code type: STRING mode: NULLABLE @@ -60,17 +57,20 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with at least one link to the site were shown to a user. + description: The number of times that search results with at least one link to the + site were shown to a user. - name: clicks type: INTEGER mode: NULLABLE - description: The number of times a user clicked at least one search result link to the site. + description: The number of times a user clicked at least one search result link + to the site. - name: average_top_position type: FLOAT mode: NULLABLE - description: The average top position of the site in the search results, where `1` is the topmost position. + description: The average top position of the site in the search results, where `1` + is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 2024-07-26 20:01:25.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -friendly_name: Global Outages -description: |- - Please provide a description for the query -owners: [] -labels: {} -bigquery: null -workgroup_access: -- role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential - - workgroup:internet-outages/external -references: - view.sql: - - moz-fx-data-shared-prod.internet_outages.global_outages_v1 - - moz-fx-data-shared-prod.internet_outages.global_outages_v2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 2024-07-26 20:01:25.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/schema.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -fields: -- name: country - type: STRING - mode: NULLABLE -- name: city - type: STRING - mode: NULLABLE -- name: geo_subdivision1 - type: STRING - mode: NULLABLE -- name: geo_subdivision2 - type: STRING - mode: NULLABLE -- name: datetime - type: TIMESTAMP - mode: NULLABLE -- name: proportion_undefined - type: FLOAT - mode: NULLABLE -- name: proportion_timeout - type: FLOAT - mode: NULLABLE -- name: proportion_abort - type: FLOAT - mode: NULLABLE -- name: proportion_unreachable - type: FLOAT - mode: NULLABLE -- name: proportion_terminated - type: FLOAT - mode: NULLABLE -- name: proportion_channel_open - type: FLOAT - mode: NULLABLE -- name: avg_dns_success_time - type: FLOAT - mode: NULLABLE -- name: missing_dns_success - type: FLOAT - mode: NULLABLE -- name: avg_dns_failure_time - type: FLOAT - mode: NULLABLE -- name: missing_dns_failure - type: FLOAT - mode: NULLABLE -- name: count_dns_failure - type: FLOAT - mode: NULLABLE -- name: ssl_error_prop - type: FLOAT - mode: NULLABLE -- name: avg_tls_handshake_time - type: FLOAT - mode: NULLABLE -- name: dataset_version - type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 2024-07-26 20:00:50.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages/view.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,52 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.internet_outages.global_outages` -AS -SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime`, - proportion_undefined, - proportion_timeout, - proportion_abort, - proportion_unreachable, - proportion_terminated, - proportion_channel_open, - avg_dns_success_time, - missing_dns_success, - avg_dns_failure_time, - missing_dns_failure, - count_dns_failure, - ssl_error_prop, - avg_tls_handshake_time, - "v2" AS dataset_version, -FROM - `moz-fx-data-shared-prod.internet_outages.global_outages_v2` -WHERE - DATE(`datetime`) >= "2024-07-26" -UNION ALL -SELECT - country, - city, - CAST(NULL AS STRING) AS geo_subdivision1, - CAST(NULL AS STRING) AS geo_subdivision2, - `datetime`, - proportion_undefined, - proportion_timeout, - proportion_abort, - proportion_unreachable, - proportion_terminated, - proportion_channel_open, - avg_dns_success_time, - missing_dns_success, - avg_dns_failure_time, - missing_dns_failure, - count_dns_failure, - ssl_error_prop, - avg_tls_handshake_time, - "v1" AS dataset_version, -FROM - `moz-fx-data-shared-prod.internet_outages.global_outages_v1` -WHERE - DATE(`datetime`) < "2024-07-26" diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 20:00:50.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/checks.sql 2024-07-26 20:01:38.000000000 +0000 @@ -1,10 +1,10 @@ -#warn +#fail {{ min_row_count(1000, where="DATE(datetime) = @submission_date") }} -#warn +#fail {{ is_unique(columns=["datetime", "city", "country"], where="DATE(`datetime`) = @submission_date") }} -#warn +#fail /* This statement used to contain the following fields, but these are sometimes missing from country/city combinations diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 20:02:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v1/metadata.yaml 2024-07-26 20:20:57.000000000 +0000 @@ -1,4 +1,4 @@ -friendly_name: Internet Outages (Version 1) +friendly_name: Internet Outages description: |- This contains a set aggregated metrics that correlate to internet outages for different countries in the world. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 2024-07-26 20:00:50.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/checks.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,38 +0,0 @@ -{% set _WHERE = '@submission_date = @submission_date' %} - -#warn -{{ min_row_count(1000, where=_WHERE) }} - -#warn -{{ is_unique(columns=["country", "city", "geo_subdivision1", "geo_subdivision2", "datetime"], where=_WHERE) }} - -#warn -/* - This statement used to contain the following fields, - but these are sometimes missing from country/city combinations - See https://sql.telemetry.mozilla.org/queries/96541/source - and bug 1868674 - - "avg_tls_handshake_time" - "count_dns_failure" -*/ -{{ not_null(columns=[ - "country", - "city", - "datetime", - "proportion_undefined", - "proportion_timeout", - "proportion_abort", - "proportion_unreachable", - "proportion_terminated", - "proportion_channel_open", - "avg_dns_success_time", - "missing_dns_success", - "avg_dns_failure_time", - "missing_dns_failure", - "ssl_error_prop", - -], where=_WHERE) }} - -#warn -{{ value_length(column="country", expected_length=2, where=_WHERE) }} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 2024-07-26 20:02:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,98 +0,0 @@ -friendly_name: Internet Outages (version 2) -description: |- - Note: Version 2 contains two additional fields: geo_subdivision1 && geo_subdivision2 for more accurate data grouping - and the data available starting 2024/07/26. - - This contains a set aggregated metrics that correlate to internet - outages for different countries in the world. - - The dataset contains the following fields: - - `country`: - the Country code of the client. - - `city`: - the City name (only for cities with a population >= 15000, 'unknown' otherwise). - - `geo_subdivision1`: - Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces, - French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes. - - `geo_subdivision2`: - Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces. - - `datetime`: - the date and the time (truncated to hour) the data was submitted by the client. - - `proportion_undefined`: - the proportion of users who failed to send telemetry for a reason that was not listed in the other cases. - - `proportion_timeout`: - the proportion of users that had their connection timeout while uploading telemetry - ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)). - - `proportion_abort`: - the proportion of users that had their connection terminated by the client (for example, terminating open connections before shutting down). - - `proportion_unreachable`: - the proportion of users that failed to upload telemetry because the server was not reachable - (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension). - - `proportion_terminated`: - the proportion of users that had their connection terminated internally by the networking code. - - `proportion_channel_open`: - the proportion of users for which the upload request was terminated immediately, by the client, because of a Necko internal error. - - `avg_dns_success_time`: - the average time it takes for a successful DNS resolution, in milliseconds. - - `missing_dns_success`: - counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram. - - `avg_dns_failure_time`: - the average time it takes for an unsuccessful DNS resolution, in milliseconds. - - `missing_dns_failure`: - counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram. - - `count_dns_failure`: - the average count of unsuccessful DNS resolutions reported. - - `ssl_error_prop`: - the proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram. - - `avg_tls_handshake_time`: - the average time after the TCP SYN to ready for HTTP, in milliseconds. - - Caveats with the data: - As with any observational data, there are many caveats and interpretation must - be done carefully. Below is a list of issues we have considered, but it is not - exhaustive. - - Firefox users are not representative of the general population in their - region. - - Users can experience multiple types of failures and so the proportions - are not summable. For example, if 2.4% of clients had a timeout and 2.6% of - clients had eUnreachable that doesn't necessarily mean that 5.0% of clients - had a timeout or a eUnreachable - - Geographical data is based on IPGeo databases. These databases are - imperfect, so some activity may be attributed to the wrong location. - Further, proxy and VPN usage can create geo-attribution errors. - - Data contains duplicates prior to mid-2022. See [this bug](https://bugzilla.mozilla.org/show_bug.cgi?id=1753470) - for further information. -owners: -- aplacitelli@mozilla.com -labels: - incremental: true - review_bugs: - - '1640204' - dag: bqetl_internet_outages - owner1: aplacitelli -scheduling: - dag_name: bqetl_internet_outages -bigquery: - time_partitioning: - type: day - field: datetime - require_partition_filter: false - expiration_days: null - range_partitioning: null - clustering: - fields: - - country - - city -workgroup_access: -- role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential - - workgroup:internet-outages/external -references: - checks.sql: - - .. - query.sql: - - C - - moz-fx-data-shared-prod.telemetry.clients_daily - - moz-fx-data-shared-prod.telemetry.health - - moz-fx-data-shared-prod.telemetry_stable.main_v5 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 2024-07-26 20:00:50.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/query.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,557 +0,0 @@ --- This sums the values reported by an histogram. -CREATE TEMP FUNCTION sum_values(x ARRAY>) AS ( - ( - WITH a AS ( - SELECT - IF(ARRAY_LENGTH(x) > 0, 1, 0) AS isPres1 - ), - b AS ( - SELECT - -- Truncate pathological values that are beyond the documented limits per - -- https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/collection/histograms.html#histogram-values - SUM(LEAST(2147483648, value)) AS t - FROM - UNNEST(x) - WHERE - key > 0 - ) - SELECT - COALESCE(isPres1 * t, 0) - FROM - a, - b - ) -); - --- This counts how many times an histogram is not present. --- It checks if the histogram is present at all and whether or not it recorded --- any non-zero value. -CREATE TEMP FUNCTION empty(x ARRAY>) AS ( - ( - WITH a AS ( - SELECT - IF(ARRAY_LENGTH(x) = 0, 1, 0) AS isEmpty1 - ), - b AS ( - SELECT - IF(MAX(value) = 0, 1, 0) isEmpty2 - FROM - UNNEST(x) - ), - c AS ( - SELECT - IF(isEmpty2 = 1 OR isEmpty1 = 1, 1, 0) AS Empty - FROM - a, - b - ) - SELECT - Empty - FROM - C - ) -); - --- Get a stable source for DAUs. -WITH DAUs AS ( - SELECT - `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`( - country, - city, - geo_subdivision1, - geo_subdivision2 - ).*, - -- Truncate the submission timestamp to the hour. Note that this filed was - -- introduced on the 16th December 2019, so it will be `null` for queries - -- before that day. See https://github.com/mozilla/bigquery-etl/pull/603 . - TIMESTAMP_TRUNC(submission_timestamp_min, HOUR) AS `datetime`, - COUNT(*) AS client_count - FROM - `moz-fx-data-shared-prod.telemetry.clients_daily` - WHERE - submission_date = @submission_date - -- Country can be null or `??` if geoip lookup failed. - -- There's no point in adding these to the analyses. - -- Due to a bug in `telemetry.clients_daily` we need to - -- check for '??' as well in addition to null. - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - -- Filter filter out cities for which we have less than or equal to - -- 50 hourly active users. This will make sure data won't end up in - -- the final table. - HAVING - country <> "??" - AND client_count > 50 -), --- Compute aggregates for the health data. -health_data_sample AS ( - SELECT - `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`( - metadata.geo.country, - metadata.geo.city, - metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1 - metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2 - ).*, - TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS `datetime`, - client_id, - SUM( - COALESCE( - SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.undefined') AS INT64), - 0 - ) - ) AS e_undefined, - SUM( - COALESCE( - SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.timeout') AS INT64), - 0 - ) - ) AS e_timeout, - SUM( - COALESCE( - SAFE_CAST(JSON_EXTRACT(additional_properties, '$.payload.sendFailure.abort') AS INT64), - 0 - ) - ) AS e_abort, - SUM( - COALESCE( - SAFE_CAST( - JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eUnreachable') AS INT64 - ), - 0 - ) - ) AS e_unreachable, - SUM( - COALESCE( - SAFE_CAST( - JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eTerminated') AS INT64 - ), - 0 - ) - ) AS e_terminated, - SUM( - COALESCE( - SAFE_CAST( - JSON_EXTRACT(additional_properties, '$.payload.sendFailure.eChannelOpen') AS INT64 - ), - 0 - ) - ) AS e_channel_open, - FROM - `moz-fx-data-shared-prod.telemetry.health` - WHERE - DATE(submission_timestamp) = @submission_date - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime`, - client_id - HAVING - country <> "??" -), -health_data_aggregates AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime`, - COUNTIF(e_undefined > 0) AS num_clients_e_undefined, - COUNTIF(e_timeout > 0) AS num_clients_e_timeout, - COUNTIF(e_abort > 0) AS num_clients_e_abort, - COUNTIF(e_unreachable > 0) AS num_clients_e_unreachable, - COUNTIF(e_terminated > 0) AS num_clients_e_terminated, - COUNTIF(e_channel_open > 0) AS num_clients_e_channel_open, - FROM - health_data_sample - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), -final_health_data AS ( - SELECT - h.country, - h.city, - h.geo_subdivision1, - h.geo_subdivision2, - h.datetime, - (num_clients_e_undefined / DAUs.client_count) AS proportion_undefined, - (num_clients_e_timeout / DAUs.client_count) AS proportion_timeout, - (num_clients_e_abort / DAUs.client_count) AS proportion_abort, - (num_clients_e_unreachable / DAUs.client_count) AS proportion_unreachable, - (num_clients_e_terminated / DAUs.client_count) AS proportion_terminated, - (num_clients_e_channel_open / DAUs.client_count) AS proportion_channel_open, - FROM - health_data_aggregates AS h - INNER JOIN - DAUs - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -), --- Compute aggregates for histograms coming from the health ping. -histogram_data_sample AS ( - SELECT - `moz-fx-data-shared-prod.udf.geo_struct_set_defaults`( - metadata.geo.country, - metadata.geo.city, - metadata.geo.subdivision1, -- returned field gets renamed to geo_subdivision1 - metadata.geo.subdivision2 -- returned field gets renamed to geo_subdivision2 - ).*, - client_id, - document_id, - TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS time_slot, - payload.info.subsession_length AS subsession_length, - mozfun.hist.extract(payload.histograms.dns_failed_lookup_time).values AS dns_fail, - mozfun.hist.extract(payload.histograms.dns_lookup_time).values AS dns_success, - mozfun.hist.extract(payload.histograms.ssl_cert_verification_errors).values AS ssl_cert_errors, - mozfun.hist.extract( - payload.processes.content.histograms.http_page_tls_handshake - ).values AS tls_handshake, - FROM - `moz-fx-data-shared-prod.telemetry_stable.main_v5` - WHERE - DATE(submission_timestamp) = @submission_date - -- Restrict to Firefox. - AND normalized_app_name = 'Firefox' - -- Only to pings who seem to represent an active session. - AND payload.info.subsession_length >= 0 - -- Country can be null if geoip lookup failed. - -- There's no point in adding these to the analyses. - AND metadata.geo.country IS NOT NULL -), --- DNS_SUCCESS histogram -dns_success_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - EXP(SUM(LOG(key) * count) / SUM(count)) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - key, - SUM(LEAST(2147483648, value)) AS count - FROM - histogram_data_sample - CROSS JOIN - UNNEST(histogram_data_sample.dns_success) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - key - ) - WHERE - key > 0 - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- Oddness: active sessions without DNS_LOOKUP_TIME -dns_no_dns_lookup_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / ( - 1 + SUM(IF(subsession_length > 0, 1, 0)) - ) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - subsession_length, - empty(dns_success) AS is_empty - FROM - histogram_data_sample - ) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- A shared source for the DNS_FAIL histogram -dns_failure_src AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - key, - SUM(LEAST(2147483648, value)) AS count - FROM - histogram_data_sample - CROSS JOIN - UNNEST(histogram_data_sample.dns_fail) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - key -), --- DNS_FAIL histogram -dns_failure_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - EXP(SUM(LOG(key) * count) / SUM(count)) AS value - FROM - dns_failure_src - WHERE - key > 0 - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- DNS_FAIL counts -dns_failure_counts AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - AVG(count) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - SUM(count) AS count - FROM - dns_failure_src - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id - ) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- Oddness: active sessions without DNS_FAILED_LOOKUP_TIME -dns_no_dns_failure_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - SUM(IF(subsession_length > 0 AND is_empty = 1, 1, 0)) / ( - 1 + SUM(IF(subsession_length > 0, 1, 0)) - ) AS value - FROM - ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - client_id, - time_slot, - subsession_length, - empty(dns_fail) AS is_empty - FROM - histogram_data_sample - ) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- SSL_CERT_VERIFICATION_ERRORS histograms -ssl_error_prop_src AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - document_id, - subsession_length, - sum_values(ssl_cert_errors) AS ssl_sum_vals - FROM - histogram_data_sample -), -ssl_error_prop AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - SUM(IF(subsession_length > 0 AND ssl_sum_vals > 0, 1, 0)) / ( - 1 + SUM(IF(subsession_length > 0, 1, 0)) - ) AS value - FROM - ssl_error_prop_src - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -), --- TLS_HANDSHAKE histogram -tls_handshake_time AS ( - SELECT - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot AS `datetime`, - EXP(SUM(LOG(key) * count) / SUM(count)) AS value - FROM - ( - SELECT - country, - city, - client_id, - geo_subdivision1, - geo_subdivision2, - time_slot, - key, - SUM(LEAST(2147483648, value)) AS count - FROM - histogram_data_sample - CROSS JOIN - UNNEST(histogram_data_sample.tls_handshake) - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - time_slot, - client_id, - key - ) - WHERE - key > 0 - GROUP BY - country, - city, - geo_subdivision1, - geo_subdivision2, - `datetime` - HAVING - COUNT(*) > 50 -) -SELECT - DAUs.country AS country, - -- Caveat: mindmind location mapping returns `??` in cases where - -- the population is less than 15k. As long as the entry has a higher level - -- grouping (country) we still want to keep it. For backwards compatibility, - -- we rename this value to `unknown` to represent a lack of value. - IF(DAUs.city = "??", "unknown", DAUs.city) AS city, - NULLIF(DAUs.geo_subdivision1, "??") AS geo_subdivision1, - NULLIF(DAUs.geo_subdivision2, "??") AS geo_subdivision2, - DAUs.datetime AS `datetime`, - hd.* EXCEPT (`datetime`, country, city, geo_subdivision1, geo_subdivision2), - ds.value AS avg_dns_success_time, - ds_missing.value AS missing_dns_success, - df.value AS avg_dns_failure_time, - df_missing.value AS missing_dns_failure, - dfc.value AS count_dns_failure, - ssl.value AS ssl_error_prop, - tls.value AS avg_tls_handshake_time -FROM - final_health_data AS hd --- We apply LEFT JOIN here and in the other places instead --- of a FULL OUTER JOIN. Since LEFT is DAUs, which should contain --- all the countries and all the days, it should always have matches --- with whatever we pass on the RIGHT. --- When doing a FULL OUTER JOIN, we end up sometimes with nulls on the --- left because there are a few samples coming from telemetry.main that --- are not accounted for in `moz-fx-data-shared-prod.telemetry.clients_daily` -LEFT JOIN - DAUs - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_success_time AS ds - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_no_dns_lookup_time AS ds_missing - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_failure_time AS df - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_failure_counts AS dfc - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - dns_no_dns_failure_time AS df_missing - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - tls_handshake_time AS tls - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) -LEFT JOIN - ssl_error_prop AS ssl - USING (`datetime`, country, city, geo_subdivision1, geo_subdivision2) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 2024-07-26 20:00:50.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/internet_outages/global_outages_v2/schema.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,116 +0,0 @@ -fields: -- mode: NULLABLE - name: country - type: STRING - description: | - The Country code of the client. - -- mode: NULLABLE - name: city - type: STRING - description: | - The City name (only for cities with a population >= 15000, 'unknown' otherwise). - -- mode: NULLABLE - name: geo_subdivision1 - type: STRING - description: | - Country subdivision (1st level) corresponds, for example, to US states, Canadian provinces, - French regions, Spanish autonomous communities, and Japanese prefectures. Geo charts do not support FIPS or NUTS codes. - -- mode: NULLABLE - name: geo_subdivision2 - type: STRING - description: | - Country subdivision (2nd level) corresponds, for example, to US counties, French departments, and Italian and Spanish provinces. - -- mode: NULLABLE - name: datetime - type: TIMESTAMP - description: | - The date and the time (truncated to hour) the data was submitted by the client. - -- mode: NULLABLE - name: proportion_undefined - type: FLOAT - description: | - The proportion of users who failed to send telemetry for a reason that was not listed in the other cases. - -# yamllint disable rule:line-length -- mode: NULLABLE - name: proportion_timeout - type: FLOAT - description: | - The proportion of users that had their connection timeout while uploading telemetry - ([after 90s, in Firefox Desktop](https://searchfox.org/mozilla-central/rev/fa2df28a49883612bd7af4dacd80cdfedcccd2f6/toolkit/components/telemetry/app/TelemetrySend.jsm#81)). -# yamllint enable rule:line-length - -- mode: NULLABLE - name: proportion_abort - type: FLOAT - description: | - The proportion of users that had their connection terminated by the client - (for example, terminating open connections before shutting down). - -- mode: NULLABLE - name: proportion_unreachable - type: FLOAT - description: | - The proportion of users that failed to upload telemetry because the server was not reachable - (e.g. because the host was not reachable, proxy problems or OS waking up after a suspension). - -- mode: NULLABLE - name: proportion_terminated - type: FLOAT - description: | - The proportion of users that had their connection terminated internally by the networking code. - -- mode: NULLABLE - name: proportion_channel_open - type: FLOAT - description: | - The proportion of users for which the upload request was terminated immediately, - by the client,because of a Necko internal error. - -- mode: NULLABLE - name: avg_dns_success_time - type: FLOAT - description: | - The average time it takes for a successful DNS resolution, in milliseconds. - -- mode: NULLABLE - name: missing_dns_success - type: FLOAT - description: | - - Counts how many sessions did not report the `DNS_LOOKUP_TIME` histogram. - -- mode: NULLABLE - name: avg_dns_failure_time - type: FLOAT - description: | - The average time it takes for an unsuccessful DNS resolution, in milliseconds. - -- mode: NULLABLE - name: missing_dns_failure - type: FLOAT - description: | - Counts how many sessions did not report the `DNS_FAILED_LOOKUP_TIME` histogram. - -- mode: NULLABLE - name: count_dns_failure - type: FLOAT - description: | - The average count of unsuccessful DNS resolutions reported. - -- mode: NULLABLE - name: ssl_error_prop - type: FLOAT - description: | - The proportion of users that reported an error through the `SSL_CERT_VERIFICATION_ERRORS` histogram. - -- mode: NULLABLE - name: avg_tls_handshake_time - type: FLOAT - description: | - The average time after the TCP SYN to ready for HTTP, in milliseconds. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml 2024-07-26 20:01:24.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml 2024-07-26 20:17:39.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - mode: NULLABLE type: STRING + mode: NULLABLE description: App ID of the channel data was received from - name: normalized_channel - mode: NULLABLE type: STRING + mode: NULLABLE description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 20:01:57.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-07-26 20:20:54.000000000 +0000 @@ -1,10 +1,19 @@ +friendly_name: Event Monitoring Live +description: |- + Materialized view of experimentation related events + coming from monitor_frontend. +owners: +- ascholtz@mozilla.com +- akomar@mozilla.com +labels: + materialized_view: true + owner1: ascholtz + owner2: akomar +bigquery: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: materialized_view.sql: - moz-fx-data-shared-prod.monitor_frontend_live.events_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 20:01:57.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-07-26 20:20:54.000000000 +0000 @@ -1,10 +1,26 @@ +friendly_name: Monitor Dashboard User Journey Funnels +description: |- + Please provide a description for the query +owners: +- ksiegler@mozilla.org +labels: + incremental: true + dag: bqetl_generated_funnels + owner1: ksiegler +scheduling: + dag_name: bqetl_generated_funnels +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: query.sql: - mozdata.monitor_frontend.events_unnested diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 20:00:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-07-26 20:10:57.000000000 +0000 @@ -1,7 +1,13 @@ fields: -- type: DATETIME - name: build_hour -- type: INTEGER - name: geckoview_major_version -- type: INTEGER - name: n_pings +- name: build_hour + type: DATETIME + mode: NULLABLE + description: null +- name: geckoview_major_version + type: INTEGER + mode: NULLABLE + description: null +- name: n_pings + type: INTEGER + mode: NULLABLE + description: null diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/mobile_search_cli ```

⚠️ Only part of the diff is displayed.

Link to full diff