mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
253 stars 100 forks source link

Remove dev_telemetry_derived dataset #5835

Closed edugfilho closed 3 months ago

edugfilho commented 3 months ago

All GLAM-related works will be done on my sandbox.

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 3 months ago

Integration report for "Remove dev_telemetry_derived dataset"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod: dev_telemetry_derived diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-06-25 15:47:24.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-06-25 15:47:24.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/dev_telemetry_derived/dataset_metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/dev_telemetry_derived/dataset_metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/dev_telemetry_derived/dataset_metadata.yaml 2024-06-25 15:37:30.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/dev_telemetry_derived/dataset_metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,12 +0,0 @@ -friendly_name: Dev Telemetry Derived -description: |- - Dev version of Telemetry Derived for testing Glam Dev ETL pipeline. - This dataset will be the destination of queries run during the tests and - will be removed after such tests, unless we find it useful to keep it. -dataset_base_acl: derived -user_facing: false -labels: {} -workgroup_access: -- role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-06-25 15:48:15.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-06-25 15:48:14.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_android/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/focus_android/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_android/use_counters/schema.yaml 2024-06-25 15:38:06.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/focus_android/use_counters/schema.yaml 2024-06-25 15:54:56.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - type: STRING mode: NULLABLE + type: STRING description: App ID of the channel data was received from - name: normalized_channel - type: STRING mode: NULLABLE + type: STRING description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-06-25 15:46:46.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -16,103 +17,110 @@ - name: page_url type: STRING mode: NULLABLE - description: |- - The final page URL linked by a search result after any skip redirects. - This will be null for anonymized Discover impressions. + description: 'The final page URL linked by a search result after any skip redirects. + + This will be null for anonymized Discover impressions.' - name: page_domain_name type: STRING mode: NULLABLE - description: |- - Domain name of the page URL. - This will be null for anonymized Discover impressions. + description: 'Domain name of the page URL. + + This will be null for anonymized Discover impressions.' - name: page_path type: STRING mode: NULLABLE - description: |- - The path part of the page URL. - This will be null for anonymized Discover impressions. + description: 'The path part of the page URL. + + This will be null for anonymized Discover impressions.' - name: localized_site_code type: STRING mode: NULLABLE - description: |- - Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site code such as `en-US` or `de` found in the first segment + of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site type: STRING mode: NULLABLE - description: |- - Description of the localized site language and/or country based on `localized_site_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Description of the localized site language and/or country based on + `localized_site_code` (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language_code type: STRING mode: NULLABLE - description: |- - Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language code in ISO-639-alpha-2 format found in the + first segment of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language type: STRING mode: NULLABLE - description: |- - Localized site language based on `localized_site_language_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language based on `localized_site_language_code` (if + any). + + This will be null for anonymized Discover impressions.' - name: query type: STRING mode: NULLABLE - description: |- - The search query. - This will be null for anonymized search impressions, and all Discover and Google News search impressions. + description: 'The search query. + + This will be null for anonymized search impressions, and all Discover and Google + News search impressions.' - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. - This will be null for all Discover and Google News search impressions. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\ + \ and Google News search impressions." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + The `query` field will be null for anonymized search impressions. - The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions. + + The `country_code`, `page_url`, and related fields will be null for anonymized + Discover impressions.' - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'Whether Google Search considers the page to be providing a good page + experience. + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. - * Discover: In Google's Discover feed. - * Google News: On news.google.com or in the Google News app on Android and iOS. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\ + \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\ + \ or in the Google News app on Android and iOS." - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'How the search result appeared (e.g. normal result, translated result, + video). + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: user_country_code type: STRING mode: NULLABLE - description: |- - Country from which the user was searching, in ISO-3166-1-alpha-3 format. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format. + + This will be null for anonymized Discover impressions.' - name: user_country type: STRING mode: NULLABLE - description: |- - Country from which the user was searching. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching. + + This will be null for anonymized Discover impressions.' - name: user_region type: STRING mode: NULLABLE @@ -124,13 +132,15 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. - This will be null for Discover impressions. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet. + + This will be null for Discover impressions.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with a link to the page were shown to a user. + description: The number of times that search results with a link to the page were + shown to a user. - name: clicks type: INTEGER mode: NULLABLE @@ -138,6 +148,7 @@ - name: average_position type: FLOAT mode: NULLABLE - description: |- - The average position of the page in the search results, where `1` is the topmost position. - This will be null for Discover and Google News search impressions. + description: 'The average position of the page in the search results, where `1` + is the topmost position. + + This will be null for Discover and Google News search impressions.' diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-06-25 15:46:46.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -20,27 +21,23 @@ - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. - The `query` field will be null for anonymized search impressions. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + + The `query` field will be null for anonymized search impressions.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." - name: user_country_code type: STRING mode: NULLABLE @@ -60,17 +57,20 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with at least one link to the site were shown to a user. + description: The number of times that search results with at least one link to the + site were shown to a user. - name: clicks type: INTEGER mode: NULLABLE - description: The number of times a user clicked at least one search result link to the site. + description: The number of times a user clicked at least one search result link + to the site. - name: average_top_position type: FLOAT mode: NULLABLE - description: The average top position of the site in the search results, where `1` is the topmost position. + description: The average top position of the site in the search results, where `1` + is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml 2024-06-25 15:38:07.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/klar_android/use_counters/schema.yaml 2024-06-25 15:52:46.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - type: STRING mode: NULLABLE + type: STRING description: App ID of the channel data was received from - name: normalized_channel - type: STRING mode: NULLABLE + type: STRING description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-06-25 15:37:29.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/org_mozilla_fenix/geckoview_version/schema.yaml 2024-06-25 15:46:56.000000000 +0000 @@ -1,7 +1,13 @@ fields: -- type: DATETIME - name: build_hour -- type: INTEGER - name: geckoview_major_version -- type: INTEGER - name: n_pings +- name: build_hour + type: DATETIME + mode: NULLABLE + description: null +- name: geckoview_major_version + type: INTEGER + mode: NULLABLE + description: null +- name: n_pings + type: INTEGER + mode: NULLABLE + description: null ```

Link to full diff