mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
253 stars 100 forks source link

Move limited historical Google Search Console data into separate views (DENG-4329) #6027

Closed sean-rose closed 2 months ago

sean-rose commented 2 months ago

DENG-4329: Google Search Console data synced by Fivetran is not comparable to the data exported by Google

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 months ago

Integration report for "Add views for limited historical Google Search Console data."

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py --- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-08-07 20:00:38.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-08-07 20:18:08.000000000 +0000 @@ -288,6 +288,21 @@ ) ) + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query( + task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1", + destination_table="monitor_dashboard_user_journey_funnels_v1", + dataset_id="monitor_frontend_derived", + project_id="moz-fx-data-shared-prod", + owner="ksiegler@mozilla.org", + email=[ + "ascholtz@mozilla.com", + "ksiegler@mozilla.org", + "telemetry-alerts@mozilla.com", + ], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + accounts_frontend_derived__email_first_reg_login_funnels_by_service__v1.set_upstream( wait_for_copy_deduplicate_all ) @@ -349,3 +364,7 @@ firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream( wait_for_firefox_accounts_derived__fxa_stdout_events__v1 ) + + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream( + wait_for_copy_deduplicate_all + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-08-07 20:00:39.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-08-07 20:18:09.000000000 +0000 @@ -382,21 +382,6 @@ depends_on_past=False, ) - mozilla_org_derived__blogs_sessions__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__blogs_sessions__v2", - destination_table="blogs_sessions_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - mozilla_org_derived__ga_clients__v2 = bigquery_etl_query( task_id="mozilla_org_derived__ga_clients__v2", destination_table="ga_clients_v2", @@ -554,8 +539,6 @@ mozilla_org_derived__blogs_goals__v2.set_upstream(wait_for_blogs_events_table) - mozilla_org_derived__blogs_sessions__v2.set_upstream(wait_for_blogs_events_table) - mozilla_org_derived__ga_clients__v2.set_upstream( mozilla_org_derived__ga_sessions__v2 ) Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console: limited_historical_search_impressions_by_page Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console: limited_historical_search_impressions_by_site Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: blogs_sessions_v2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml 2024-08-07 19:56:19.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml 2024-08-07 20:12:46.000000000 +0000 @@ -9,9 +9,13 @@ dag: bqetl_google_analytics_derived_ga4 scheduling: dag_name: bqetl_google_analytics_derived_ga4 - depends_on: + depends_on_tables_existing: - task_id: wait_for_blogs_events_table - dag_name: bqetl_google_analytics_derived_ga4 + table_id: moz-fx-data-marketing-prod.analytics_314399816.events_{{ ds_nodash }} + poke_interval: 30m + timeout: 10h + retries: 1 + retry_delay: 30m bigquery: time_partitioning: type: day diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 19:54:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 19:55:18.000000000 +0000 @@ -81,9 +81,7 @@ - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: Whether Google Search considers the page to be providing a good page experience. - name: search_type type: STRING mode: NULLABLE @@ -98,9 +96,7 @@ - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: How the search result appeared (e.g. normal result, translated result, video). - name: user_country_code type: STRING mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-08-07 19:54:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-08-07 20:03:08.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-08-07 19:54:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-08-07 20:03:09.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-08-07 19:54:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-08-07 20:04:13.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-08-07 19:54:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-08-07 20:04:13.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-08-07 19:55:10.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-08-07 20:06:15.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - type: STRING mode: NULLABLE + type: STRING description: App ID of the channel data was received from - name: normalized_channel - type: STRING mode: NULLABLE + type: STRING description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml 2024-08-07 20:12:47.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Limited Historical Search Impressions By Page +description: |- + Limited historical Google Search impressions aggregated by page for the following domains: + * addons.mozilla.org (between 2021-03-29 and 2023-08-08) + * blog.mozilla.org (between 2021-03-29 and 2023-08-08) + * developer.mozilla.org (between 2022-12-18 and 2024-04-29) + * getpocket.com (between 2021-03-29 and 2023-08-08) + * support.mozilla.org (between 2021-03-29 and 2023-08-08) + * www.mozilla.org (between 2021-03-29 and 2023-08-08) + + The data has the following limitations: + * For each site only data for specific historical dates is included as noted above. + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. +owners: +- srose@mozilla.com +labels: + owner1: srose +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1 + - moz-fx-data-shared-prod.static.country_codes_v1 + - moz-fx-data-shared-prod.static.language_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml 2024-08-07 20:03:23.000000000 +0000 @@ -0,0 +1,99 @@ +fields: +- name: date + type: DATE + mode: NULLABLE + description: The day on which the search occurred (Pacific Time). +- name: site_url + type: STRING + mode: NULLABLE + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' +- name: site_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the site. +- name: page_url + type: STRING + mode: NULLABLE + description: The final page URL linked by a search result after any skip redirects. +- name: page_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the page URL. +- name: page_path + type: STRING + mode: NULLABLE + description: The path part of the page URL. +- name: localized_site_code + type: STRING + mode: NULLABLE + description: Localized site code such as `en-US` or `de` found in the first segment + of the page URL path (if any). +- name: localized_site + type: STRING + mode: NULLABLE + description: Description of the localized site language and/or country based on + `localized_site_code` (if any). +- name: localized_site_language_code + type: STRING + mode: NULLABLE + description: Localized site language code in ISO-639-alpha-2 format found in the + first segment of the page URL path (if any). +- name: localized_site_language + type: STRING + mode: NULLABLE + description: Localized site language based on `localized_site_language_code` (if + any). +- name: query + type: STRING + mode: NULLABLE + description: The search query. +- name: query_type + type: STRING + mode: NULLABLE + description: "Type of search query:\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." +- name: search_type + type: STRING + mode: NULLABLE + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." +- name: user_country_code + type: STRING + mode: NULLABLE + description: Country from which the user was searching, in ISO-3166-1-alpha-3 format. +- name: user_country + type: STRING + mode: NULLABLE + description: Country from which the user was searching. +- name: user_region + type: STRING + mode: NULLABLE + description: Region from which the user was searching. +- name: user_subregion + type: STRING + mode: NULLABLE + description: Sub-region from which the user was searching. +- name: device_type + type: STRING + mode: NULLABLE + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' +- name: impressions + type: INTEGER + mode: NULLABLE + description: The number of times that search results with a link to the page were + shown to a user. +- name: clicks + type: INTEGER + mode: NULLABLE + description: The number of times a user clicked a search result link to the page. +- name: average_position + type: FLOAT + mode: NULLABLE + description: The average position of the page in the search results, where `1` is + the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql 2024-08-07 19:55:18.000000000 +0000 @@ -0,0 +1,66 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_search_console.limited_historical_search_impressions_by_page` +AS +SELECT + search_impressions.date, + search_impressions.site_url, + search_impressions.site_domain_name, + search_impressions.page_url, + search_impressions.page_domain_name, + search_impressions.page_path, + search_impressions.localized_site_code, + CONCAT( + COALESCE(localized_site_language.name, search_impressions.localized_site_language_code), + COALESCE( + CONCAT( + ' - ', + COALESCE(localized_site_country.name, search_impressions.localized_site_country_code) + ), + '' + ) + ) AS localized_site, + search_impressions.localized_site_language_code, + COALESCE( + localized_site_language.name, + search_impressions.localized_site_language_code + ) AS localized_site_language, + search_impressions.query, + mozfun.google_search_console.classify_site_query( + search_impressions.site_domain_name, + search_impressions.query, + search_impressions.search_type + ) AS query_type, + search_impressions.search_type, + search_impressions.user_country_code, + COALESCE(user_country.name, search_impressions.user_country_code) AS user_country, + user_country.region_name AS user_region, + user_country.subregion_name AS user_subregion, + search_impressions.device_type, + search_impressions.impressions, + search_impressions.clicks, + search_impressions.average_position +FROM + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1` AS search_impressions +LEFT JOIN + `moz-fx-data-shared-prod.static.language_codes_v1` AS localized_site_language + ON search_impressions.localized_site_language_code = localized_site_language.code_2 +LEFT JOIN + `moz-fx-data-shared-prod.static.country_codes_v1` AS localized_site_country + ON search_impressions.localized_site_country_code = localized_site_country.code +LEFT JOIN + `moz-fx-data-shared-prod.static.country_codes_v1` AS user_country + ON search_impressions.user_country_code = user_country.code_3 +WHERE + -- Exclude the last two days of data for each site because the data for those days is incomplete. + CASE + WHEN site_domain_name IN ( + 'addons.mozilla.org', + 'blog.mozilla.org', + 'getpocket.com', + 'support.mozilla.org', + 'www.mozilla.org' + ) + THEN `date` <= '2023-08-08' + WHEN site_domain_name = 'developer.mozilla.org' + THEN `date` <= '2024-04-29' + END diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml 2024-08-07 20:12:47.000000000 +0000 @@ -0,0 +1,27 @@ +friendly_name: Limited Historical Search Impressions By Site +description: |- + Limited historical Google Search impressions aggregated by site for the following domains: + * addons.mozilla.org (between 2021-03-29 and 2023-08-08) + * blog.mozilla.org (between 2021-03-29 and 2023-08-08) + * developer.mozilla.org (between 2022-12-18 and 2024-04-29) + * getpocket.com (between 2021-03-29 and 2023-08-08) + * support.mozilla.org (between 2021-03-29 and 2023-08-08) + * www.mozilla.org (between 2021-03-29 and 2023-08-08) + + The data has the following limitations: + * For each site only data for specific historical dates is included as noted above. + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. +owners: +- srose@mozilla.com +labels: + owner1: srose +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1 + - moz-fx-data-shared-prod.static.country_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml 2024-08-07 20:03:23.000000000 +0000 @@ -0,0 +1,68 @@ +fields: +- name: date + type: DATE + mode: NULLABLE + description: The day on which the search occurred (Pacific Time). +- name: site_url + type: STRING + mode: NULLABLE + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' +- name: site_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the site. +- name: query + type: STRING + mode: NULLABLE + description: The search query. +- name: query_type + type: STRING + mode: NULLABLE + description: "Type of search query:\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." +- name: search_type + type: STRING + mode: NULLABLE + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." +- name: user_country_code + type: STRING + mode: NULLABLE + description: Country from which the user was searching, in ISO-3166-1-alpha-3 format. +- name: user_country + type: STRING + mode: NULLABLE + description: Country from which the user was searching. +- name: user_region + type: STRING + mode: NULLABLE + description: Region from which the user was searching. +- name: user_subregion + type: STRING + mode: NULLABLE + description: Sub-region from which the user was searching. +- name: device_type + type: STRING + mode: NULLABLE + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' +- name: impressions + type: INTEGER + mode: NULLABLE + description: The number of times that search results with at least one link to the + site were shown to a user. +- name: clicks + type: INTEGER + mode: NULLABLE + description: The number of times a user clicked at least one search result link + to the site. +- name: average_top_position + type: FLOAT + mode: NULLABLE + description: The average top position of the site in the search results, where `1` + is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql 2024-08-07 19:55:18.000000000 +0000 @@ -0,0 +1,41 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_search_console.limited_historical_search_impressions_by_site` +AS +SELECT + search_impressions.`date`, + search_impressions.site_url, + search_impressions.site_domain_name, + search_impressions.query, + mozfun.google_search_console.classify_site_query( + search_impressions.site_domain_name, + search_impressions.query, + search_impressions.search_type + ) AS query_type, + search_impressions.search_type, + search_impressions.user_country_code, + COALESCE(user_country.name, search_impressions.user_country_code) AS user_country, + user_country.region_name AS user_region, + user_country.subregion_name AS user_subregion, + search_impressions.device_type, + search_impressions.impressions, + search_impressions.clicks, + search_impressions.average_top_position +FROM + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1` AS search_impressions +LEFT JOIN + `moz-fx-data-shared-prod.static.country_codes_v1` AS user_country + ON search_impressions.user_country_code = user_country.code_3 +WHERE + -- Exclude the last two days of data for each site because the data for those days is incomplete. + CASE + WHEN site_domain_name IN ( + 'addons.mozilla.org', + 'blog.mozilla.org', + 'getpocket.com', + 'support.mozilla.org', + 'www.mozilla.org' + ) + THEN `date` <= '2023-08-08' + WHEN site_domain_name = 'developer.mozilla.org' + THEN `date` <= '2024-04-29' + END diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml 2024-08-07 19:56:27.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml 2024-08-07 20:12:47.000000000 +0000 @@ -1,22 +1,12 @@ friendly_name: Search Impressions By Page description: |- Google Search impressions aggregated by page for the following domains: - * addons.mozilla.org - * blog.mozilla.org - * developer.mozilla.org - * getpocket.com - * support.mozilla.org - * www.mozilla.org - - For the developer.mozilla.org domain: - * Records from 2024-04-10 onward are from source data exported directly to BigQuery by Google. - * Records before 2024-04-10 are from source data synced to BigQuery by Fivetran. - - For the other domains: - * Records from 2023-08-01 onward are from source data exported directly to BigQuery by Google. - * Records before 2023-08-01 are from source data synced to BigQuery by Fivetran. - - Anonymized search queries, and Discover and Google News search impressions are only included if the source data was exported directly to BigQuery by Google. + * addons.mozilla.org (since 2023-07-11) + * blog.mozilla.org (since 2023-07-11) + * developer.mozilla.org (since 2024-04-10) + * getpocket.com (since 2023-07-24) + * support.mozilla.org (since 2023-07-11) + * www.mozilla.org (since 2023-07-11) owners: - srose@mozilla.com labels: @@ -28,7 +18,6 @@ - workgroup:mozilla-confidential references: view.sql: - - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1 - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v2 - moz-fx-data-shared-prod.static.country_codes_v1 - moz-fx-data-shared-prod.static.language_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 19:54:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 20:03:23.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -16,103 +17,104 @@ - name: page_url type: STRING mode: NULLABLE - description: |- - The final page URL linked by a search result after any skip redirects. - This will be null for anonymized Discover impressions. + description: 'The final page URL linked by a search result after any skip redirects. + + This will be null for anonymized Discover impressions.' - name: page_domain_name type: STRING mode: NULLABLE - description: |- - Domain name of the page URL. - This will be null for anonymized Discover impressions. + description: 'Domain name of the page URL. + + This will be null for anonymized Discover impressions.' - name: page_path type: STRING mode: NULLABLE - description: |- - The path part of the page URL. - This will be null for anonymized Discover impressions. + description: 'The path part of the page URL. + + This will be null for anonymized Discover impressions.' - name: localized_site_code type: STRING mode: NULLABLE - description: |- - Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site code such as `en-US` or `de` found in the first segment + of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site type: STRING mode: NULLABLE - description: |- - Description of the localized site language and/or country based on `localized_site_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Description of the localized site language and/or country based on + `localized_site_code` (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language_code type: STRING mode: NULLABLE - description: |- - Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language code in ISO-639-alpha-2 format found in the + first segment of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language type: STRING mode: NULLABLE - description: |- - Localized site language based on `localized_site_language_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language based on `localized_site_language_code` (if + any). + + This will be null for anonymized Discover impressions.' - name: query type: STRING mode: NULLABLE - description: |- - The search query. - This will be null for anonymized search impressions, and all Discover and Google News search impressions. + description: 'The search query. + + This will be null for anonymized search impressions, and all Discover and Google + News search impressions.' - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. - This will be null for all Discover and Google News search impressions. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\ + \ and Google News search impressions." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + The `query` field will be null for anonymized search impressions. - The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions. + + The `country_code`, `page_url`, and related fields will be null for anonymized + Discover impressions.' - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: Whether Google Search considers the page to be providing a good page + experience. - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. - * Discover: In Google's Discover feed. - * Google News: On news.google.com or in the Google News app on Android and iOS. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\ + \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\ + \ or in the Google News app on Android and iOS." - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: How the search result appeared (e.g. normal result, translated result, + video). - name: user_country_code type: STRING mode: NULLABLE - description: |- - Country from which the user was searching, in ISO-3166-1-alpha-3 format. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format. + + This will be null for anonymized Discover impressions.' - name: user_country type: STRING mode: NULLABLE - description: |- - Country from which the user was searching. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching. + + This will be null for anonymized Discover impressions.' - name: user_region type: STRING mode: NULLABLE @@ -124,13 +126,15 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. - This will be null for Discover impressions. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet. + + This will be null for Discover impressions.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with a link to the page were shown to a user. + description: The number of times that search results with a link to the page were + shown to a user. - name: clicks type: INTEGER mode: NULLABLE @@ -138,6 +142,7 @@ - name: average_position type: FLOAT mode: NULLABLE - description: |- - The average position of the page in the search results, where `1` is the topmost position. - This will be null for Discover and Google News search impressions. + description: 'The average position of the page in the search results, where `1` + is the topmost position. + + This will be null for Discover and Google News search impressions.' diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql 2024-08-07 19:54:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql 2024-08-07 19:55:18.000000000 +0000 @@ -1,79 +1,6 @@ CREATE OR REPLACE VIEW `moz-fx-data-shared-prod.google_search_console.search_impressions_by_page` AS -WITH search_impressions_union AS ( - SELECT - `date`, - site_url, - site_domain_name, - page_url, - page_domain_name, - page_path, - localized_site_code, - localized_site_language_code, - localized_site_country_code, - query, - FALSE AS is_anonymized, - CAST(NULL AS BOOLEAN) AS has_good_page_experience, - search_type, - CAST(NULL AS STRING) AS search_appearance, - user_country_code, - device_type, - impressions, - clicks, - average_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` < '2023-08-01' - WHEN site_domain_name = 'developer.mozilla.org' - THEN `date` < '2024-04-10' - ELSE FALSE - END - UNION ALL - SELECT - `date`, - site_url, - site_domain_name, - page_url, - page_domain_name, - page_path, - localized_site_code, - localized_site_language_code, - localized_site_country_code, - query, - is_anonymized, - has_good_page_experience, - search_type, - search_appearance, - user_country_code, - device_type, - impressions, - clicks, - average_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v2` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` >= '2023-08-01' - ELSE TRUE - END -) SELECT search_impressions.date, search_impressions.site_url, @@ -116,7 +43,7 @@ search_impressions.clicks, search_impressions.average_position FROM - search_impressions_union AS search_impressions + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v2` AS search_impressions LEFT JOIN `moz-fx-data-shared-prod.static.language_codes_v1` AS localized_site_language ON search_impressions.localized_site_language_code = localized_site_language.code_2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml 2024-08-07 19:56:27.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml 2024-08-07 20:12:47.000000000 +0000 @@ -1,22 +1,12 @@ friendly_name: Search Impressions By Site description: |- Google Search impressions aggregated by site for the following domains: - * addons.mozilla.org - * blog.mozilla.org - * developer.mozilla.org - * getpocket.com - * support.mozilla.org - * www.mozilla.org - - For the developer.mozilla.org domain: - * Records from 2024-04-10 onward are from source data exported directly to BigQuery by Google. - * Records before 2024-04-10 are from source data synced to BigQuery by Fivetran. - - For the other domains: - * Records from 2023-08-01 onward are from source data exported directly to BigQuery by Google. - * Records before 2023-08-01 are from source data synced to BigQuery by Fivetran. - - Anonymized search queries are only included if the source data was exported directly to BigQuery by Google. + * addons.mozilla.org (since 2023-07-11) + * blog.mozilla.org (since 2023-07-11) + * developer.mozilla.org (since 2024-04-10) + * getpocket.com (since 2023-07-24) + * support.mozilla.org (since 2023-07-11) + * www.mozilla.org (since 2023-07-11) owners: - srose@mozilla.com labels: @@ -28,6 +18,5 @@ - workgroup:mozilla-confidential references: view.sql: - - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1 - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v2 - moz-fx-data-shared-prod.static.country_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-08-07 19:54:31.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-08-07 20:03:23.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -20,27 +21,23 @@ - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. - The `query` field will be null for anonymized search impressions. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + + The `query` field will be null for anonymized search impressions.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." - name: user_country_code type: STRING mode: NULLABLE @@ -60,17 +57,20 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with at least one link to the site were shown to a user. + description: The number of times that search results with at least one link to the + site were shown to a user. - name: clicks type: INTEGER mode: NULLABLE - description: The number of times a user clicked at least one search result link to the site. + description: The number of times a user clicked at least one search result link + to the site. - name: average_top_position type: FLOAT mode: NULLABLE - description: The average top position of the site in the search results, where `1` is the topmost position. + description: The average top position of the site in the search results, where `1` + is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql 2024-08-07 19:54:32.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql 2024-08-07 19:55:18.000000000 +0000 @@ -1,63 +1,6 @@ CREATE OR REPLACE VIEW `moz-fx-data-shared-prod.google_search_console.search_impressions_by_site` AS -WITH search_impressions_union AS ( - SELECT - `date`, - site_url, - site_domain_name, - query, - FALSE AS is_anonymized, - search_type, - user_country_code, - device_type, - impressions, - clicks, - average_top_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` < '2023-08-01' - WHEN site_domain_name = 'developer.mozilla.org' - THEN `date` < '2024-04-10' - ELSE FALSE - END - UNION ALL - SELECT - `date`, - site_url, - site_domain_name, - query, - is_anonymized, - search_type, - user_country_code, - device_type, - impressions, - clicks, - average_top_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v2` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` >= '2023-08-01' - ELSE TRUE - END -) SELECT search_impressions.`date`, search_impressions.site_url, @@ -79,7 +22,7 @@ search_impressions.clicks, search_impressions.average_top_position FROM - search_impressions_union AS search_impressions + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v2` AS search_impressions LEFT JOIN `moz-fx-data-shared-prod.static.country_codes_v1` AS user_country ON search_impressions.user_country_code = user_country.code_3 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml 2024-08-07 19:56:26.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml 2024-08-07 20:12:50.000000000 +0000 @@ -8,7 +8,9 @@ * support.mozilla.org * www.mozilla.org - Anonymized search queries aren't included. + The data has the following limitations: + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. For the developer.mozilla.org domain, we stopped syncing Google Search Console data with Fivetran in May 2024. See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816. @@ -22,6 +24,7 @@ owner1: srose scheduling: date_partition_parameter: date + date_partition_offset: -7 bigquery: time_partitioning: type: day diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml 2024-08-07 19:56:26.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml 2024-08-07 20:12:50.000000000 +0000 @@ -8,7 +8,9 @@ * support.mozilla.org * www.mozilla.org - Anonymized search queries aren't included. + The data has the following limitations: + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. For the developer.mozilla.org domain, we stopped syncing Google Search Console data with Fivetran in May 2024. See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816. @@ -22,6 +24,7 @@ owner1: srose scheduling: date_partition_parameter: date + date_partition_offset: -7 bigquery: time_partitioning: type: day diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-08-07 19:55:44.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-08-07 20:12:52.000000000 +0000 @@ -1,10 +1,19 @@ +friendly_name: Event Monitoring Live +description: |- + Materialized view of experimentation related events + coming from monitor_frontend. +owners: +- ascholtz@mozilla.com +- akomar@mozilla.com +labels: + materialized_view: true + owner1: ascholtz + owner2: akomar +bigquery: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: materialized_view.sql: - moz-fx-data-shared-prod.monitor_frontend_live.events_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-08-07 19:55:44.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-08-07 20:12:52.000000000 +0000 @@ -1,10 +1,26 @@ +friendly_name: Monitor Dashboard User Journey Funnels +description: |- + Please provide a description for the query +owners: +- ksiegler@mozilla.org +labels: + incremental: true + dag: bqetl_generated_funnels + owner1: ksiegler +scheduling: + dag_name: bqetl_generated_funnels +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: query.sql: - mozdata.monitor_frontend.events_unnested diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql 2024-08-07 19:55:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/query.sql 2024-08-07 20:03:01.000000000 +0000 @@ -1,4 +1,4 @@ -WITH first_partition_mlhackweek_search_stable AS ( +WITH first_partition_relay_backend_stable AS ( SELECT table_catalog, table_schema, @@ -6,25 +6,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.mlhackweek_search_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.relay_backend_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_mlhackweek_search_stable AS ( +first_non_empty_partition_relay_backend_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.mlhackweek_search_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.relay_backend_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_bedrock_stable AS ( +first_partition_firefox_desktop_background_tasks_stable AS ( SELECT table_catalog, table_schema, @@ -32,25 +32,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.bedrock_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_bedrock_stable AS ( +first_non_empty_partition_firefox_desktop_background_tasks_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.bedrock_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_default_browser_agent_stable AS ( +first_partition_firefox_translations_stable AS ( SELECT table_catalog, table_schema, @@ -58,25 +58,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.default_browser_agent_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.firefox_translations_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_default_browser_agent_stable AS ( +first_non_empty_partition_firefox_translations_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.default_browser_agent_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.firefox_translations_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_moso_mastodon_backend_stable AS ( +first_partition_contextual_services_stable AS ( SELECT table_catalog, table_schema, @@ -84,25 +84,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.moso_mastodon_backend_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.contextual_services_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_moso_mastodon_backend_stable AS ( +first_non_empty_partition_contextual_services_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.moso_mastodon_backend_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.contextual_services_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_org_mozilla_ios_firefoxbeta_stable AS ( +first_partition_org_mozilla_fennec_aurora_stable AS ( SELECT table_catalog, table_schema, @@ -110,25 +110,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.org_mozilla_ios_firefoxbeta_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.org_mozilla_fennec_aurora_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_org_mozilla_ios_firefoxbeta_stable AS ( +first_non_empty_partition_org_mozilla_fennec_aurora_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.org_mozilla_ios_firefoxbeta_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.org_mozilla_fennec_aurora_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_firefox_installer_stable AS ( +first_partition_regrets_reporter_ucs_stable AS ( SELECT table_catalog, table_schema, @@ -136,25 +136,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.firefox_installer_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.regrets_reporter_ucs_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_firefox_installer_stable AS ( +first_non_empty_partition_regrets_reporter_ucs_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.firefox_installer_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.regrets_reporter_ucs_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_org_mozilla_ios_firefoxvpn_network_extension_stable AS ( +first_partition_org_mozilla_firefox_beta_stable AS ( SELECT table_catalog, table_schema, @@ -162,25 +162,25 @@ PARSE_DATE("%Y%m%d", partition_id) AS first_partition_current, total_rows AS first_partition_row_count, FROM - `moz-fx-data-shared-prod.org_mozilla_ios_firefoxvpn_network_extension_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.org_mozilla_firefox_beta_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' QUALIFY ROW_NUMBER() OVER (PARTITION BY table_name ORDER BY partition_id) = 1 ), -first_non_empty_partition_org_mozilla_ios_firefoxvpn_network_extension_stable AS ( +first_non_empty_partition_org_mozilla_firefox_beta_stable AS ( SELECT table_name, PARSE_DATE("%Y%m%d", MIN(partition_id)) AS first_non_empty_partition_current, FROM - `moz-fx-data-shared-prod.org_mozilla_ios_firefoxvpn_network_extension_stable.INFORMATION_SCHEMA.PARTITIONS` + `moz-fx-data-shared-prod.org_mozilla_firefox_beta_stable.INFORMATION_SCHEMA.PARTITIONS` WHERE partition_id != '__NULL__' AND total_rows > 0 GROUP BY table_name ), -first_partition_org_mozilla_ios_focus_stable AS ( +first_partition_mdn_yari_stable AS ( ```

⚠️ Only part of the diff is displayed.

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "Merge branch 'main' into DENG-4329-GSC-Fivetran"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console: limited_historical_search_impressions_by_page Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console: limited_historical_search_impressions_by_site diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 20:53:20.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 20:53:15.000000000 +0000 @@ -81,9 +81,7 @@ - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: Whether Google Search considers the page to be providing a good page experience. - name: search_type type: STRING mode: NULLABLE @@ -98,9 +96,7 @@ - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: How the search result appeared (e.g. normal result, translated result, video). - name: user_country_code type: STRING mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/metadata.yaml 2024-08-07 20:55:08.000000000 +0000 @@ -0,0 +1,28 @@ +friendly_name: Limited Historical Search Impressions By Page +description: |- + Limited historical Google Search impressions aggregated by page for the following domains: + * addons.mozilla.org (between 2021-03-29 and 2023-08-08) + * blog.mozilla.org (between 2021-03-29 and 2023-08-08) + * developer.mozilla.org (between 2022-12-18 and 2024-04-29) + * getpocket.com (between 2021-03-29 and 2023-08-08) + * support.mozilla.org (between 2021-03-29 and 2023-08-08) + * www.mozilla.org (between 2021-03-29 and 2023-08-08) + + The data has the following limitations: + * For each site only data for specific historical dates is included as noted above. + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. +owners: +- srose@mozilla.com +labels: + owner1: srose +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1 + - moz-fx-data-shared-prod.static.country_codes_v1 + - moz-fx-data-shared-prod.static.language_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/schema.yaml 2024-08-07 20:53:15.000000000 +0000 @@ -0,0 +1,97 @@ +fields: +- name: date + type: DATE + mode: NULLABLE + description: The day on which the search occurred (Pacific Time). +- name: site_url + type: STRING + mode: NULLABLE + description: |- + For domain properties, this will be `sc-domain:` followed by the domain name. + For URL-prefix properties, it will be the full URL of the property definition. +- name: site_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the site. +- name: page_url + type: STRING + mode: NULLABLE + description: The final page URL linked by a search result after any skip redirects. +- name: page_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the page URL. +- name: page_path + type: STRING + mode: NULLABLE + description: The path part of the page URL. +- name: localized_site_code + type: STRING + mode: NULLABLE + description: Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any). +- name: localized_site + type: STRING + mode: NULLABLE + description: Description of the localized site language and/or country based on `localized_site_code` (if any). +- name: localized_site_language_code + type: STRING + mode: NULLABLE + description: Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any). +- name: localized_site_language + type: STRING + mode: NULLABLE + description: Localized site language based on `localized_site_language_code` (if any). +- name: query + type: STRING + mode: NULLABLE + description: The search query. +- name: query_type + type: STRING + mode: NULLABLE + description: |- + Type of search query: + * Brand: Query contained one or more Mozilla brand keywords. + * Non-Brand: Query didn't contain any Mozilla brand keywords. + * Unknown: Query couldn't be classified. +- name: search_type + type: STRING + mode: NULLABLE + description: |- + Where the link was seen by the user: + * Web: In Google Search's default "All" tab. + * Image: In Google Search's "Images" tab. + * Video: In Google Search's "Videos" tab. + * News: In Google Search's "News" tab. +- name: user_country_code + type: STRING + mode: NULLABLE + description: Country from which the user was searching, in ISO-3166-1-alpha-3 format. +- name: user_country + type: STRING + mode: NULLABLE + description: Country from which the user was searching. +- name: user_region + type: STRING + mode: NULLABLE + description: Region from which the user was searching. +- name: user_subregion + type: STRING + mode: NULLABLE + description: Sub-region from which the user was searching. +- name: device_type + type: STRING + mode: NULLABLE + description: |- + The type of device on which the user was searching: Desktop, Mobile, or Tablet. +- name: impressions + type: INTEGER + mode: NULLABLE + description: The number of times that search results with a link to the page were shown to a user. +- name: clicks + type: INTEGER + mode: NULLABLE + description: The number of times a user clicked a search result link to the page. +- name: average_position + type: FLOAT + mode: NULLABLE + description: The average position of the page in the search results, where `1` is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_page/view.sql 2024-08-07 20:53:15.000000000 +0000 @@ -0,0 +1,66 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_search_console.limited_historical_search_impressions_by_page` +AS +SELECT + search_impressions.date, + search_impressions.site_url, + search_impressions.site_domain_name, + search_impressions.page_url, + search_impressions.page_domain_name, + search_impressions.page_path, + search_impressions.localized_site_code, + CONCAT( + COALESCE(localized_site_language.name, search_impressions.localized_site_language_code), + COALESCE( + CONCAT( + ' - ', + COALESCE(localized_site_country.name, search_impressions.localized_site_country_code) + ), + '' + ) + ) AS localized_site, + search_impressions.localized_site_language_code, + COALESCE( + localized_site_language.name, + search_impressions.localized_site_language_code + ) AS localized_site_language, + search_impressions.query, + mozfun.google_search_console.classify_site_query( + search_impressions.site_domain_name, + search_impressions.query, + search_impressions.search_type + ) AS query_type, + search_impressions.search_type, + search_impressions.user_country_code, + COALESCE(user_country.name, search_impressions.user_country_code) AS user_country, + user_country.region_name AS user_region, + user_country.subregion_name AS user_subregion, + search_impressions.device_type, + search_impressions.impressions, + search_impressions.clicks, + search_impressions.average_position +FROM + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1` AS search_impressions +LEFT JOIN + `moz-fx-data-shared-prod.static.language_codes_v1` AS localized_site_language + ON search_impressions.localized_site_language_code = localized_site_language.code_2 +LEFT JOIN + `moz-fx-data-shared-prod.static.country_codes_v1` AS localized_site_country + ON search_impressions.localized_site_country_code = localized_site_country.code +LEFT JOIN + `moz-fx-data-shared-prod.static.country_codes_v1` AS user_country + ON search_impressions.user_country_code = user_country.code_3 +WHERE + -- Exclude the last two days of data for each site because the data for those days is incomplete. + CASE + WHEN site_domain_name IN ( + 'addons.mozilla.org', + 'blog.mozilla.org', + 'getpocket.com', + 'support.mozilla.org', + 'www.mozilla.org' + ) + THEN `date` <= '2023-08-08' + WHEN site_domain_name = 'developer.mozilla.org' + THEN `date` <= '2024-04-29' + END diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/metadata.yaml 2024-08-07 20:55:08.000000000 +0000 @@ -0,0 +1,27 @@ +friendly_name: Limited Historical Search Impressions By Site +description: |- + Limited historical Google Search impressions aggregated by site for the following domains: + * addons.mozilla.org (between 2021-03-29 and 2023-08-08) + * blog.mozilla.org (between 2021-03-29 and 2023-08-08) + * developer.mozilla.org (between 2022-12-18 and 2024-04-29) + * getpocket.com (between 2021-03-29 and 2023-08-08) + * support.mozilla.org (between 2021-03-29 and 2023-08-08) + * www.mozilla.org (between 2021-03-29 and 2023-08-08) + + The data has the following limitations: + * For each site only data for specific historical dates is included as noted above. + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. +owners: +- srose@mozilla.com +labels: + owner1: srose +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1 + - moz-fx-data-shared-prod.static.country_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/schema.yaml 2024-08-07 20:53:15.000000000 +0000 @@ -0,0 +1,69 @@ +fields: +- name: date + type: DATE + mode: NULLABLE + description: The day on which the search occurred (Pacific Time). +- name: site_url + type: STRING + mode: NULLABLE + description: |- + For domain properties, this will be `sc-domain:` followed by the domain name. + For URL-prefix properties, it will be the full URL of the property definition. +- name: site_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the site. +- name: query + type: STRING + mode: NULLABLE + description: The search query. +- name: query_type + type: STRING + mode: NULLABLE + description: |- + Type of search query: + * Brand: Query contained one or more Mozilla brand keywords. + * Non-Brand: Query didn't contain any Mozilla brand keywords. + * Unknown: Query couldn't be classified. +- name: search_type + type: STRING + mode: NULLABLE + description: |- + Where the link was seen by the user: + * Web: In Google Search's default "All" tab. + * Image: In Google Search's "Images" tab. + * Video: In Google Search's "Videos" tab. + * News: In Google Search's "News" tab. +- name: user_country_code + type: STRING + mode: NULLABLE + description: Country from which the user was searching, in ISO-3166-1-alpha-3 format. +- name: user_country + type: STRING + mode: NULLABLE + description: Country from which the user was searching. +- name: user_region + type: STRING + mode: NULLABLE + description: Region from which the user was searching. +- name: user_subregion + type: STRING + mode: NULLABLE + description: Sub-region from which the user was searching. +- name: device_type + type: STRING + mode: NULLABLE + description: |- + The type of device on which the user was searching: Desktop, Mobile, or Tablet. +- name: impressions + type: INTEGER + mode: NULLABLE + description: The number of times that search results with at least one link to the site were shown to a user. +- name: clicks + type: INTEGER + mode: NULLABLE + description: The number of times a user clicked at least one search result link to the site. +- name: average_top_position + type: FLOAT + mode: NULLABLE + description: The average top position of the site in the search results, where `1` is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/limited_historical_search_impressions_by_site/view.sql 2024-08-07 20:53:15.000000000 +0000 @@ -0,0 +1,41 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.google_search_console.limited_historical_search_impressions_by_site` +AS +SELECT + search_impressions.`date`, + search_impressions.site_url, + search_impressions.site_domain_name, + search_impressions.query, + mozfun.google_search_console.classify_site_query( + search_impressions.site_domain_name, + search_impressions.query, + search_impressions.search_type + ) AS query_type, + search_impressions.search_type, + search_impressions.user_country_code, + COALESCE(user_country.name, search_impressions.user_country_code) AS user_country, + user_country.region_name AS user_region, + user_country.subregion_name AS user_subregion, + search_impressions.device_type, + search_impressions.impressions, + search_impressions.clicks, + search_impressions.average_top_position +FROM + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1` AS search_impressions +LEFT JOIN + `moz-fx-data-shared-prod.static.country_codes_v1` AS user_country + ON search_impressions.user_country_code = user_country.code_3 +WHERE + -- Exclude the last two days of data for each site because the data for those days is incomplete. + CASE + WHEN site_domain_name IN ( + 'addons.mozilla.org', + 'blog.mozilla.org', + 'getpocket.com', + 'support.mozilla.org', + 'www.mozilla.org' + ) + THEN `date` <= '2023-08-08' + WHEN site_domain_name = 'developer.mozilla.org' + THEN `date` <= '2024-04-29' + END diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml 2024-08-07 20:55:12.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/metadata.yaml 2024-08-07 20:55:08.000000000 +0000 @@ -1,22 +1,12 @@ friendly_name: Search Impressions By Page description: |- Google Search impressions aggregated by page for the following domains: - * addons.mozilla.org - * blog.mozilla.org - * developer.mozilla.org - * getpocket.com - * support.mozilla.org - * www.mozilla.org - - For the developer.mozilla.org domain: - * Records from 2024-04-10 onward are from source data exported directly to BigQuery by Google. - * Records before 2024-04-10 are from source data synced to BigQuery by Fivetran. - - For the other domains: - * Records from 2023-08-01 onward are from source data exported directly to BigQuery by Google. - * Records before 2023-08-01 are from source data synced to BigQuery by Fivetran. - - Anonymized search queries, and Discover and Google News search impressions are only included if the source data was exported directly to BigQuery by Google. + * addons.mozilla.org (since 2023-07-11) + * blog.mozilla.org (since 2023-07-11) + * developer.mozilla.org (since 2024-04-10) + * getpocket.com (since 2023-07-24) + * support.mozilla.org (since 2023-07-11) + * www.mozilla.org (since 2023-07-11) owners: - srose@mozilla.com labels: @@ -28,7 +18,6 @@ - workgroup:mozilla-confidential references: view.sql: - - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1 - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v2 - moz-fx-data-shared-prod.static.country_codes_v1 - moz-fx-data-shared-prod.static.language_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 20:53:20.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-07 20:53:15.000000000 +0000 @@ -81,9 +81,7 @@ - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: Whether Google Search considers the page to be providing a good page experience. - name: search_type type: STRING mode: NULLABLE @@ -98,9 +96,7 @@ - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: How the search result appeared (e.g. normal result, translated result, video). - name: user_country_code type: STRING mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql 2024-08-07 20:53:20.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/view.sql 2024-08-07 20:53:15.000000000 +0000 @@ -1,79 +1,6 @@ CREATE OR REPLACE VIEW `moz-fx-data-shared-prod.google_search_console.search_impressions_by_page` AS -WITH search_impressions_union AS ( - SELECT - `date`, - site_url, - site_domain_name, - page_url, - page_domain_name, - page_path, - localized_site_code, - localized_site_language_code, - localized_site_country_code, - query, - FALSE AS is_anonymized, - CAST(NULL AS BOOLEAN) AS has_good_page_experience, - search_type, - CAST(NULL AS STRING) AS search_appearance, - user_country_code, - device_type, - impressions, - clicks, - average_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v1` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` < '2023-08-01' - WHEN site_domain_name = 'developer.mozilla.org' - THEN `date` < '2024-04-10' - ELSE FALSE - END - UNION ALL - SELECT - `date`, - site_url, - site_domain_name, - page_url, - page_domain_name, - page_path, - localized_site_code, - localized_site_language_code, - localized_site_country_code, - query, - is_anonymized, - has_good_page_experience, - search_type, - search_appearance, - user_country_code, - device_type, - impressions, - clicks, - average_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v2` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` >= '2023-08-01' - ELSE TRUE - END -) SELECT search_impressions.date, search_impressions.site_url, @@ -116,7 +43,7 @@ search_impressions.clicks, search_impressions.average_position FROM - search_impressions_union AS search_impressions + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_page_v2` AS search_impressions LEFT JOIN `moz-fx-data-shared-prod.static.language_codes_v1` AS localized_site_language ON search_impressions.localized_site_language_code = localized_site_language.code_2 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml 2024-08-07 20:55:12.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/metadata.yaml 2024-08-07 20:55:08.000000000 +0000 @@ -1,22 +1,12 @@ friendly_name: Search Impressions By Site description: |- Google Search impressions aggregated by site for the following domains: - * addons.mozilla.org - * blog.mozilla.org - * developer.mozilla.org - * getpocket.com - * support.mozilla.org - * www.mozilla.org - - For the developer.mozilla.org domain: - * Records from 2024-04-10 onward are from source data exported directly to BigQuery by Google. - * Records before 2024-04-10 are from source data synced to BigQuery by Fivetran. - - For the other domains: - * Records from 2023-08-01 onward are from source data exported directly to BigQuery by Google. - * Records before 2023-08-01 are from source data synced to BigQuery by Fivetran. - - Anonymized search queries are only included if the source data was exported directly to BigQuery by Google. + * addons.mozilla.org (since 2023-07-11) + * blog.mozilla.org (since 2023-07-11) + * developer.mozilla.org (since 2024-04-10) + * getpocket.com (since 2023-07-24) + * support.mozilla.org (since 2023-07-11) + * www.mozilla.org (since 2023-07-11) owners: - srose@mozilla.com labels: @@ -28,6 +18,5 @@ - workgroup:mozilla-confidential references: view.sql: - - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1 - moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v2 - moz-fx-data-shared-prod.static.country_codes_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql 2024-08-07 20:53:20.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/view.sql 2024-08-07 20:53:15.000000000 +0000 @@ -1,63 +1,6 @@ CREATE OR REPLACE VIEW `moz-fx-data-shared-prod.google_search_console.search_impressions_by_site` AS -WITH search_impressions_union AS ( - SELECT - `date`, - site_url, - site_domain_name, - query, - FALSE AS is_anonymized, - search_type, - user_country_code, - device_type, - impressions, - clicks, - average_top_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v1` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` < '2023-08-01' - WHEN site_domain_name = 'developer.mozilla.org' - THEN `date` < '2024-04-10' - ELSE FALSE - END - UNION ALL - SELECT - `date`, - site_url, - site_domain_name, - query, - is_anonymized, - search_type, - user_country_code, - device_type, - impressions, - clicks, - average_top_position - FROM - `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v2` - WHERE - CASE - WHEN site_domain_name IN ( - 'addons.mozilla.org', - 'blog.mozilla.org', - 'getpocket.com', - 'support.mozilla.org', - 'www.mozilla.org' - ) - THEN `date` >= '2023-08-01' - ELSE TRUE - END -) SELECT search_impressions.`date`, search_impressions.site_url, @@ -79,7 +22,7 @@ search_impressions.clicks, search_impressions.average_top_position FROM - search_impressions_union AS search_impressions + `moz-fx-data-shared-prod.google_search_console_derived.search_impressions_by_site_v2` AS search_impressions LEFT JOIN `moz-fx-data-shared-prod.static.country_codes_v1` AS user_country ON search_impressions.user_country_code = user_country.code_3 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml 2024-08-07 20:55:15.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_page_v1/metadata.yaml 2024-08-07 20:55:08.000000000 +0000 @@ -8,7 +8,9 @@ * support.mozilla.org * www.mozilla.org - Anonymized search queries aren't included. + The data has the following limitations: + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. For the developer.mozilla.org domain, we stopped syncing Google Search Console data with Fivetran in May 2024. See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816. @@ -22,6 +24,7 @@ owner1: srose scheduling: date_partition_parameter: date + date_partition_offset: -7 bigquery: time_partitioning: type: day diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml 2024-08-07 20:55:15.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console_derived/search_impressions_by_site_v1/metadata.yaml 2024-08-07 20:55:08.000000000 +0000 @@ -8,7 +8,9 @@ * support.mozilla.org * www.mozilla.org - Anonymized search queries aren't included. + The data has the following limitations: + * For each site only the top ~50,000 rows of data per day per search type is included (prioritized by number of clicks). + * Anonymized search queries aren't included. For the developer.mozilla.org domain, we stopped syncing Google Search Console data with Fivetran in May 2024. See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816. @@ -22,6 +24,7 @@ owner1: srose scheduling: date_partition_parameter: date + date_partition_offset: -7 bigquery: time_partitioning: type: day ```

Link to full diff