mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
253 stars 100 forks source link

Fix search_revenue_levers_daily to pull from aggregates table for shr… #6005

Closed alekhyamoz closed 1 month ago

alekhyamoz commented 2 months ago

…edder mitigation

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 months ago

Integration report for "Fix search_revenue_levers_daily to pull from aggregates table for shredder mitigation"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-01 22:05:22.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-01 22:05:08.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-01 22:05:22.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-01 22:05:08.000000000 +0000 @@ -83,10 +83,10 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="fenix.checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1", + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", execution_delta=datetime.timedelta(seconds=9000), check_existence=True, mode="reschedule", @@ -95,186 +95,6 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_checks__fail_org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="fenix.checks__fail_org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="fenix.checks__fail_org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="fenix.checks__fail_org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="fenix.checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="focus_android.checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_focus_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_focus_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="focus_android.checks__fail_org_mozilla_focus_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="focus_android.checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="firefox_ios.checks__fail_org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="firefox_ios.checks__fail_org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="firefox_ios.checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="focus_ios.checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="klar_ios.checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1 = ExternalTaskSensor( - task_id="wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1", - external_dag_id="bqetl_glean_usage", - external_task_id="klar_android.checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1", - execution_delta=datetime.timedelta(seconds=9000), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_copy_deduplicate_all = ExternalTaskSensor( - task_id="wait_for_copy_deduplicate_all", - external_dag_id="copy_deduplicate", - external_task_id="copy_deduplicate_all", - execution_delta=datetime.timedelta(seconds=12600), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_search_derived__search_clients_daily__v8 = ExternalTaskSensor( - task_id="wait_for_search_derived__search_clients_daily__v8", - external_dag_id="bqetl_search", - external_task_id="search_derived__search_clients_daily__v8", - execution_delta=datetime.timedelta(seconds=5400), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -347,69 +167,9 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 - ) - - search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search.py /tmp/workspace/generated-sql/dags/bqetl_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search.py 2024-08-01 22:05:22.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search.py 2024-08-01 22:05:08.000000000 +0000 @@ -167,13 +167,6 @@ ) ExternalTaskMarker( - task_id="bqetl_search_dashboard__wait_for_search_derived__search_clients_daily__v8", - external_dag_id="bqetl_search_dashboard", - external_task_id="wait_for_search_derived__search_clients_daily__v8", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=81000)).isoformat() }}", - ) - - ExternalTaskMarker( task_id="bqetl_addons__wait_for_search_derived__search_clients_daily__v8", external_dag_id="bqetl_addons", external_task_id="wait_for_search_derived__search_clients_daily__v8", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-01 21:58:38.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-01 22:02:14.000000000 +0000 @@ -1,468 +1,9 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap - FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` - WHERE - submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( - SELECT - submission_date, - country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap - FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` - WHERE - submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country -), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT - submission_date, - country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF( - ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap - FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` - WHERE - submission_date = @submission_date - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( - SELECT DISTINCT - submission_date, - "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT - submission_date, - country, - COUNT(DISTINCT client_id) AS eligible_dau, - COUNT( - DISTINCT IF( - ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap - FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) - GROUP BY - submission_date, - country -), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( - SELECT - submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap - FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, - country -), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( - SELECT - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap - FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country -) --- combine all desktop and mobile together -SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, + normalized_engine AS partner, 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, sap, tagged_sap, tagged_follow_on, @@ -470,62 +11,18 @@ ad_click, organic, ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT + 0 AS search_with_ads_organic, + 0 AS monetizable_sap + FROM + `moz-fx-data-shared-prod.search.search_aggregates` + WHERE + submission_date = @submission_date + UNION ALL + SELECT submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, + normalized_engine AS partner, 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, sap, tagged_sap, tagged_follow_on, @@ -533,63 +30,50 @@ ad_click, organic, ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT + 0 AS search_with_ads_organic, + 0 AS monetizable_sap + FROM + `moz-fx-data-shared-prod.search.mobile_search_aggregates` + WHERE + submission_date = @submission_date +), +desktop_mobile_dau AS ( + SELECT submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, + partner, + device, country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL + dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap + FROM + `mozdata.analysis.search_dau_aggregates_ak` + WHERE + submission_date = @submission_date +) SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, + cd.submission_date, + cd.partner, + cd.device, NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_data cd +LEFT JOIN + desktop_mobile_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 2 months ago

Integration report for "Pull DAU from two separate tables"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-08-06 02:29:04.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_google_analytics_derived_ga4__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_google_analytics_derived_ga4", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", @@ -331,6 +338,13 @@ ) ExternalTaskMarker( + task_id="bqetl_search__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_search", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=82800)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_main_summary__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_main_summary", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", @@ -344,6 +358,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_engagement_model__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_engagement_model", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_desktop_retention_model__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_desktop_retention_model", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", @@ -460,32 +481,6 @@ parameters=["submission_date:DATE:{{ds}}"], ) - with TaskGroup( - "clients_first_seen_v3_external", - ) as clients_first_seen_v3_external: - ExternalTaskMarker( - task_id="bqetl_desktop_conv_evnt_categorization__wait_for_clients_first_seen_v3", - external_dag_id="bqetl_desktop_conv_evnt_categorization", - external_task_id="wait_for_clients_first_seen_v3", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", - ) - - ExternalTaskMarker( - task_id="bqetl_search__wait_for_clients_first_seen_v3", - external_dag_id="bqetl_search", - external_task_id="wait_for_clients_first_seen_v3", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=82800)).isoformat() }}", - ) - - ExternalTaskMarker( - task_id="bqetl_desktop_engagement_model__wait_for_clients_first_seen_v3", - external_dag_id="bqetl_desktop_engagement_model", - external_task_id="wait_for_clients_first_seen_v3", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", - ) - - clients_first_seen_v3_external.set_upstream(clients_first_seen_v3) - fenix_derived__funnel_retention_clients_week_2__v1 = bigquery_etl_query( task_id="fenix_derived__funnel_retention_clients_week_2__v1", destination_table="funnel_retention_clients_week_2_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-08-06 02:29:06.000000000 +0000 @@ -50,10 +50,11 @@ tags=tags, ) as dag: - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( - task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", - external_dag_id="bqetl_main_summary", - external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -61,11 +62,12 @@ failed_states=FAILED_STATES, pool="DATA_ENG_EXTERNALTASKSENSOR", ) + ) - wait_for_clients_first_seen_v3 = ExternalTaskSensor( - task_id="wait_for_clients_first_seen_v3", - external_dag_id="bqetl_analytics_tables", - external_task_id="clients_first_seen_v3", + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -131,11 +133,11 @@ ) google_ads_derived__conversion_event_categorization__v1.set_upstream( - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 ) google_ads_derived__conversion_event_categorization__v1.set_upstream( - wait_for_clients_first_seen_v3 + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) google_ads_derived__conversion_event_categorization__v1.set_upstream( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_engagement_model.py /tmp/workspace/generated-sql/dags/bqetl_desktop_engagement_model.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_engagement_model.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_engagement_model.py 2024-08-06 02:29:06.000000000 +0000 @@ -50,10 +50,11 @@ tags=tags, ) as dag: - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( - task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", - external_dag_id="bqetl_main_summary", - external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -61,11 +62,12 @@ failed_states=FAILED_STATES, pool="DATA_ENG_EXTERNALTASKSENSOR", ) + ) - wait_for_clients_first_seen_v3 = ExternalTaskSensor( - task_id="wait_for_clients_first_seen_v3", - external_dag_id="bqetl_analytics_tables", - external_task_id="clients_first_seen_v3", + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -101,9 +103,9 @@ ) telemetry_derived__desktop_engagement_clients__v1.set_upstream( - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 ) telemetry_derived__desktop_engagement_clients__v1.set_upstream( - wait_for_clients_first_seen_v3 + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py --- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-08-06 02:29:05.000000000 +0000 @@ -288,6 +288,21 @@ ) ) + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query( + task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1", + destination_table="monitor_dashboard_user_journey_funnels_v1", + dataset_id="monitor_frontend_derived", + project_id="moz-fx-data-shared-prod", + owner="ksiegler@mozilla.org", + email=[ + "ascholtz@mozilla.com", + "ksiegler@mozilla.org", + "telemetry-alerts@mozilla.com", + ], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + accounts_frontend_derived__email_first_reg_login_funnels_by_service__v1.set_upstream( wait_for_copy_deduplicate_all ) @@ -349,3 +364,7 @@ firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream( wait_for_firefox_accounts_derived__fxa_stdout_events__v1 ) + + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream( + wait_for_copy_deduplicate_all + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-06 02:29:02.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_monitoring.py /tmp/workspace/generated-sql/dags/bqetl_monitoring.py --- /tmp/workspace/main-generated-sql/dags/bqetl_monitoring.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_monitoring.py 2024-08-06 02:29:03.000000000 +0000 @@ -295,17 +295,6 @@ email=["amiyaguchi@mozilla.com", "ascholtz@mozilla.com"], ) - monitoring_derived__table_partition_expirations__v1 = bigquery_etl_query( - task_id="monitoring_derived__table_partition_expirations__v1", - destination_table="table_partition_expirations_v1", - dataset_id="monitoring_derived", - project_id="moz-fx-data-shared-prod", - owner="bewu@mozilla.cam", - email=["ascholtz@mozilla.com", "bewu@mozilla.cam"], - date_partition_parameter="submission_date", - depends_on_past=True, - ) - monitoring_derived__telemetry_missing_columns__v3 = bigquery_etl_query( task_id="monitoring_derived__telemetry_missing_columns__v3", destination_table="telemetry_missing_columns_v3", @@ -361,10 +350,6 @@ wait_for_copy_deduplicate_all ) - monitoring_derived__table_partition_expirations__v1.set_upstream( - wait_for_copy_deduplicate_all - ) - monitoring_derived__telemetry_missing_columns__v3.set_upstream( wait_for_copy_deduplicate_all ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-06 02:29:02.000000000 +0000 @@ -251,23 +251,11 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_copy_deduplicate_all = ExternalTaskSensor( - task_id="wait_for_copy_deduplicate_all", - external_dag_id="copy_deduplicate", - external_task_id="copy_deduplicate_all", - execution_delta=datetime.timedelta(seconds=12600), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_search_derived__search_clients_daily__v8 = ExternalTaskSensor( - task_id="wait_for_search_derived__search_clients_daily__v8", - external_dag_id="bqetl_search", - external_task_id="search_derived__search_clients_daily__v8", - execution_delta=datetime.timedelta(seconds=5400), + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), check_existence=True, mode="reschedule", allowed_states=ALLOWED_STATES, @@ -403,13 +391,13 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search.py /tmp/workspace/generated-sql/dags/bqetl_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search.py 2024-08-06 02:11:46.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search.py 2024-08-06 02:29:02.000000000 +0000 @@ -76,10 +76,11 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_clients_first_seen_v3 = ExternalTaskSensor( - task_id="wait_for_clients_first_seen_v3", + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_analytics_tables", - external_task_id="clients_first_seen_v3", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", execution_delta=datetime.timedelta(seconds=3600), check_existence=True, mode="reschedule", @@ -87,6 +88,7 @@ failed_states=FAILED_STATES, pool="DATA_ENG_EXTERNALTASKSENSOR", ) + ) search_derived__search_aggregates__v8 = bigquery_etl_query( task_id="search_derived__search_aggregates__v8", @@ -165,13 +167,6 @@ ) ExternalTaskMarker( - task_id="bqetl_search_dashboard__wait_for_search_derived__search_clients_daily__v8", - external_dag_id="bqetl_search_dashboard", - external_task_id="wait_for_search_derived__search_clients_daily__v8", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=81000)).isoformat() }}", - ) - - ExternalTaskMarker( task_id="bqetl_addons__wait_for_search_derived__search_clients_daily__v8", external_dag_id="bqetl_addons", external_task_id="wait_for_search_derived__search_clients_daily__v8", @@ -298,7 +293,7 @@ ) search_derived__search_clients_last_seen__v2.set_upstream( - wait_for_clients_first_seen_v3 + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 ) search_derived__search_clients_last_seen__v2.set_upstream( Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads: ppa_measurements_limited Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived: table_partition_expirations_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml 2024-08-06 02:06:09.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml 2024-08-06 02:22:40.000000000 +0000 @@ -2,6 +2,6 @@ # Generated by bigquery_etl.dependency references: view.sql: - - moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout + - moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout_20* - moz-fx-fxa-nonprod.gke_fxa_stage_log.stderr - moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql 2024-08-06 02:05:22.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql 2024-08-06 02:06:16.000000000 +0000 @@ -28,12 +28,12 @@ AND jsonPayload.fields.event_type IS NOT NULL UNION ALL SELECT - DATE(`timestamp`) AS submission_date, + PARSE_DATE('%y%m%d', _TABLE_SUFFIX) AS submission_date, JSON_VALUE(jsonPayload.fields.user_properties, '$.flow_id') AS flow_id, `timestamp`, TO_HEX(SHA256(jsonPayload.fields.user_id)) AS fxa_uid, FROM - `moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout` + `moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout_20*` WHERE jsonPayload.type = 'amplitudeEvent' AND jsonPayload.fields.event_type IS NOT NULL diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:08.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/accounts_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:26.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements/metadata.yaml 2024-08-06 02:06:25.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements/metadata.yaml 2024-08-06 02:22:43.000000000 +0000 @@ -5,6 +5,9 @@ This data lives in the moz-fx-ads-nonprod project and is generated by the PPA DAP Collector job (dap_collector_ppa_dev DAG, defined in telemetry-airflow and docker-etl) + + Temporarily pointed at the dev instance of this data; will point to prod once + that exists owners: - cmorales@mozilla.com workgroup_access: diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/metadata.yaml 2024-08-06 02:06:25.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -friendly_name: PPA Measurements -description: |- - Aggregated conversion data from PPA (Privacy-Preserving Attribution) - - Strips sensitive fields advertiser_name, advertiser_id, and campaign_id to - produce a version of this view that is safe to expose to mozilla-confidential - - This data lives in the moz-fx-ads-nonprod project and is generated by the - PPA DAP Collector job (dap_collector_ppa_dev DAG, defined in - telemetry-airflow and docker-etl) -owners: - - cmorales@mozilla.com -labels: - authorized: true -workgroup_access: - - role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential - -# Generated by bigquery_etl.dependency -references: - view.sql: - - moz-fx-ads-prod.ppa.measurements diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/view.sql 2024-08-06 02:05:22.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/ads/ppa_measurements_limited/view.sql 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -CREATE OR REPLACE VIEW - `moz-fx-data-shared-prod.ads.ppa_measurements_limited` -AS -SELECT - collection_time, - placement_id, - ad_id, - conversion_key, - task_size, - task_id, - task_index, - conversion_count, -FROM - `moz-fx-ads-prod.ppa.measurements` diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:09.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/burnham_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:26.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-08-06 02:05:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-08-06 02:14:16.000000000 +0000 @@ -1,49 +1,49 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: source + mode: NULLABLE +- name: source type: STRING -- mode: NULLABLE - name: event_type + mode: NULLABLE +- name: event_type type: STRING -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: subdivision1 + mode: NULLABLE +- name: subdivision1 type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: event_count + mode: NULLABLE +- name: event_count type: INTEGER -- mode: NULLABLE - name: user_count + mode: NULLABLE +- name: user_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-08-06 02:05:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-08-06 02:14:16.000000000 +0000 @@ -1,40 +1,40 @@ fields: -- mode: NULLABLE - name: submission_date +- name: submission_date type: DATE -- mode: NULLABLE - name: form_factor + mode: NULLABLE +- name: form_factor type: STRING -- mode: NULLABLE - name: country + mode: NULLABLE +- name: country type: STRING -- mode: NULLABLE - name: advertiser + mode: NULLABLE +- name: advertiser type: STRING -- mode: NULLABLE - name: normalized_os + mode: NULLABLE +- name: normalized_os type: STRING -- mode: NULLABLE - name: release_channel + mode: NULLABLE +- name: release_channel type: STRING -- mode: NULLABLE - name: position + mode: NULLABLE +- name: position type: INTEGER -- mode: NULLABLE - name: provider + mode: NULLABLE +- name: provider type: STRING -- mode: NULLABLE - name: match_type + mode: NULLABLE +- name: match_type type: STRING -- mode: NULLABLE - name: suggest_data_sharing_enabled + mode: NULLABLE +- name: suggest_data_sharing_enabled type: BOOLEAN -- mode: NULLABLE - name: impression_count + mode: NULLABLE +- name: impression_count type: INTEGER -- mode: NULLABLE - name: click_count + mode: NULLABLE +- name: click_count type: INTEGER -- mode: NULLABLE - name: query_type + mode: NULLABLE +- name: query_type type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-08-06 02:05:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_clients/schema.yaml 2024-08-06 02:14:18.000000000 +0000 @@ -26,6 +26,9 @@ - name: adjust_network type: STRING mode: NULLABLE +- name: install_source + type: STRING + mode: NULLABLE - name: retained_week_2 type: BOOLEAN mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-08-06 02:05:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/funnel_retention_week_4/schema.yaml 2024-08-06 02:14:18.000000000 +0000 @@ -48,6 +48,10 @@ description: 'The type of source of a client installation. ' +- name: install_source + type: STRING + mode: NULLABLE + description: null - name: new_profiles type: INTEGER mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-08-06 02:06:10.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-08-06 02:16:20.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - type: STRING mode: NULLABLE + type: STRING description: App ID of the channel data was received from - name: normalized_channel - type: STRING mode: NULLABLE + type: STRING description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:10.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_defaultagent_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:08.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:10.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:10.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:10.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:09.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:11.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_update_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:10.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:10.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:11.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:10.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:11.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-06 02:05:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-06 02:16:54.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -16,103 +17,110 @@ - name: page_url type: STRING mode: NULLABLE - description: |- - The final page URL linked by a search result after any skip redirects. - This will be null for anonymized Discover impressions. + description: 'The final page URL linked by a search result after any skip redirects. + + This will be null for anonymized Discover impressions.' - name: page_domain_name type: STRING mode: NULLABLE - description: |- - Domain name of the page URL. - This will be null for anonymized Discover impressions. + description: 'Domain name of the page URL. + + This will be null for anonymized Discover impressions.' - name: page_path type: STRING mode: NULLABLE - description: |- - The path part of the page URL. - This will be null for anonymized Discover impressions. + description: 'The path part of the page URL. + + This will be null for anonymized Discover impressions.' - name: localized_site_code type: STRING mode: NULLABLE - description: |- - Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site code such as `en-US` or `de` found in the first segment + of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site type: STRING mode: NULLABLE - description: |- - Description of the localized site language and/or country based on `localized_site_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Description of the localized site language and/or country based on + `localized_site_code` (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language_code type: STRING mode: NULLABLE - description: |- - Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language code in ISO-639-alpha-2 format found in the + first segment of the page URL path (if any). + + This will be null for anonymized Discover impressions.' - name: localized_site_language type: STRING mode: NULLABLE - description: |- - Localized site language based on `localized_site_language_code` (if any). - This will be null for anonymized Discover impressions. + description: 'Localized site language based on `localized_site_language_code` (if + any). + + This will be null for anonymized Discover impressions.' - name: query type: STRING mode: NULLABLE - description: |- - The search query. - This will be null for anonymized search impressions, and all Discover and Google News search impressions. + description: 'The search query. + + This will be null for anonymized search impressions, and all Discover and Google + News search impressions.' - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. - This will be null for all Discover and Google News search impressions. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified.\nThis will be null for all Discover\ + \ and Google News search impressions." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + The `query` field will be null for anonymized search impressions. - The `country_code`, `page_url`, and related fields will be null for anonymized Discover impressions. + + The `country_code`, `page_url`, and related fields will be null for anonymized + Discover impressions.' - name: has_good_page_experience type: BOOLEAN mode: NULLABLE - description: |- - Whether Google Search considers the page to be providing a good page experience. - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'Whether Google Search considers the page to be providing a good page + experience. + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. - * Discover: In Google's Discover feed. - * Google News: On news.google.com or in the Google News app on Android and iOS. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab.\n\ + \ * Discover: In Google's Discover feed.\n * Google News: On news.google.com\ + \ or in the Google News app on Android and iOS." - name: search_appearance type: STRING mode: NULLABLE - description: |- - How the search result appeared (e.g. normal result, translated result, video). - This will be null when the source data wasn't exported directly to BigQuery by Google. + description: 'How the search result appeared (e.g. normal result, translated result, + video). + + This will be null when the source data wasn''t exported directly to BigQuery by + Google.' - name: user_country_code type: STRING mode: NULLABLE - description: |- - Country from which the user was searching, in ISO-3166-1-alpha-3 format. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching, in ISO-3166-1-alpha-3 format. + + This will be null for anonymized Discover impressions.' - name: user_country type: STRING mode: NULLABLE - description: |- - Country from which the user was searching. - This will be null for anonymized Discover impressions. + description: 'Country from which the user was searching. + + This will be null for anonymized Discover impressions.' - name: user_region type: STRING mode: NULLABLE @@ -124,13 +132,15 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. - This will be null for Discover impressions. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet. + + This will be null for Discover impressions.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with a link to the page were shown to a user. + description: The number of times that search results with a link to the page were + shown to a user. - name: clicks type: INTEGER mode: NULLABLE @@ -138,6 +148,7 @@ - name: average_position type: FLOAT mode: NULLABLE - description: |- - The average position of the page in the search results, where `1` is the topmost position. - This will be null for Discover and Google News search impressions. + description: 'The average position of the page in the search results, where `1` + is the topmost position. + + This will be null for Discover and Google News search impressions.' diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-08-06 02:05:21.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/google_search_console/search_impressions_by_site/schema.yaml 2024-08-06 02:16:54.000000000 +0000 @@ -6,9 +6,10 @@ - name: site_url type: STRING mode: NULLABLE - description: |- - For domain properties, this will be `sc-domain:` followed by the domain name. - For URL-prefix properties, it will be the full URL of the property definition. + description: 'For domain properties, this will be `sc-domain:` followed by the domain + name. + + For URL-prefix properties, it will be the full URL of the property definition.' - name: site_domain_name type: STRING mode: NULLABLE @@ -20,27 +21,23 @@ - name: query_type type: STRING mode: NULLABLE - description: |- - Type of search query: - * Anonymized: Query was redacted by Google to protect the users' privacy. - * Brand: Query contained one or more Mozilla brand keywords. - * Non-Brand: Query didn't contain any Mozilla brand keywords. - * Unknown: Query couldn't be classified. + description: "Type of search query:\n * Anonymized: Query was redacted by Google\ + \ to protect the users' privacy.\n * Brand: Query contained one or more Mozilla\ + \ brand keywords.\n * Non-Brand: Query didn't contain any Mozilla brand keywords.\n\ + \ * Unknown: Query couldn't be classified." - name: is_anonymized type: BOOLEAN mode: NULLABLE - description: |- - Whether Google has anonymized the search impression to protect the users' privacy. - The `query` field will be null for anonymized search impressions. + description: 'Whether Google has anonymized the search impression to protect the + users'' privacy. + + The `query` field will be null for anonymized search impressions.' - name: search_type type: STRING mode: NULLABLE - description: |- - Where the link was seen by the user: - * Web: In Google Search's default "All" tab. - * Image: In Google Search's "Images" tab. - * Video: In Google Search's "Videos" tab. - * News: In Google Search's "News" tab. + description: "Where the link was seen by the user:\n * Web: In Google Search's\ + \ default \"All\" tab.\n * Image: In Google Search's \"Images\" tab.\n * Video:\ + \ In Google Search's \"Videos\" tab.\n * News: In Google Search's \"News\" tab." - name: user_country_code type: STRING mode: NULLABLE @@ -60,17 +57,20 @@ - name: device_type type: STRING mode: NULLABLE - description: |- - The type of device on which the user was searching: Desktop, Mobile, or Tablet. + description: 'The type of device on which the user was searching: Desktop, Mobile, + or Tablet.' - name: impressions type: INTEGER mode: NULLABLE - description: The number of times that search results with at least one link to the site were shown to a user. + description: The number of times that search results with at least one link to the + site were shown to a user. - name: clicks type: INTEGER mode: NULLABLE - description: The number of times a user clicked at least one search result link to the site. + description: The number of times a user clicked at least one search result link + to the site. - name: average_top_position type: FLOAT mode: NULLABLE - description: The average top position of the site in the search results, where `1` is the topmost position. + description: The average top position of the site in the search results, where `1` + is the topmost position. diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_daily_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_daily_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_daily_v1/schema.yaml 2024-08-06 02:08:12.000000000 +0000 @@ -42,9 +42,6 @@ name: country type: STRING - mode: NULLABLE - name: isp - type: STRING -- mode: NULLABLE name: app_build type: STRING - mode: NULLABLE @@ -72,6 +69,9 @@ name: is_new_profile type: BOOLEAN - mode: NULLABLE + name: isp + type: STRING +- mode: NULLABLE name: distribution_id type: STRING - mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_cirrus_derived/baseline_clients_last_seen_v1/schema.yaml 2024-08-06 02:08:11.000000000 +0000 @@ -33,9 +33,6 @@ type: STRING - name: country type: STRING -- mode: NULLABLE - name: isp - type: STRING - name: app_build type: STRING - name: app_channel @@ -54,6 +51,8 @@ type: DATE - name: is_new_profile type: BOOLEAN +- name: isp + type: STRING - mode: NULLABLE name: distribution_id type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-08-06 02:06:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/event_monitoring_live_v1/metadata.yaml 2024-08-06 02:23:36.000000000 +0000 @@ -1,10 +1,19 @@ +friendly_name: Event Monitoring Live +description: |- + Materialized view of experimentation related events + coming from monitor_frontend. +owners: +- ascholtz@mozilla.com +- akomar@mozilla.com +labels: + materialized_view: true + owner1: ascholtz + owner2: akomar +bigquery: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: materialized_view.sql: - moz-fx-data-shared-prod.monitor_frontend_live.events_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-08-06 02:06:49.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitor_frontend_derived/monitor_dashboard_user_journey_funnels_v1/metadata.yaml 2024-08-06 02:23:36.000000000 +0000 @@ -1,10 +1,26 @@ +friendly_name: Monitor Dashboard User Journey Funnels +description: |- + Please provide a description for the query +owners: +- ksiegler@mozilla.org +labels: + incremental: true + dag: bqetl_generated_funnels + owner1: ksiegler +scheduling: + dag_name: bqetl_generated_funnels +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null workgroup_access: - role: roles/bigquery.dataViewer members: - workgroup:mozilla-confidential - - workgroup:dataops-managed/external-fides - -# Generated by bigquery_etl.dependency references: query.sql: - mozdata.monitor_frontend.events_unnested diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/metadata.yaml 2024-08-06 02:06:11.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived/table_partition_expirations_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,102 +0,0 @@ -friendly_name: Table Partition Expirations -description: |- - Earliest partitions and partition expiration info per stable table per day. -owners: -- bewu@mozilla.cam -labels: - incremental: true - owner1: bewu - dag: bqetl_monitoring -scheduling: - dag_name: bqetl_monitoring - depends_on_past: true -bigquery: - time_partitioning: - type: day - field: run_date - require_partition_filter: false - expiration_days: null - range_partitioning: null - clustering: null -workgroup_access: -- role: roles/bigquery.dataViewer - members: - - workgroup:mozilla-confidential -references: - query.sql: - - moz-fx-data-shared-prod.accounts_backend_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.accounts_cirrus_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.accounts_frontend_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.activity_stream_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.ads_backend_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.bedrock_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.burnham_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.contextual_services_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.coverage_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.debug_ping_view_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.default_browser_agent_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.eng_workflow_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_accounts_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_desktop_background_defaultagent_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_desktop_background_tasks_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_desktop_background_update_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_desktop_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_installer_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_launcher_process_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.firefox_translations_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.glean_dictionary_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.gleanjs_docs_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mdn_yari_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.messaging_system_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mlhackweek_search_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mobile_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.monitor_backend_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.monitor_cirrus_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.monitor_frontend_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.monitoring_derived.table_partition_expirations_v1 - - moz-fx-data-shared-prod.moso_mastodon_backend_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.moso_mastodon_web_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mozilla_lockbox_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mozilla_mach_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mozillavpn_backend_cirrus_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mozillavpn_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.mozphab_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_bergamot_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_connect_firefox_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_fenix_nightly_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_fenix_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_fennec_aurora_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_firefox_beta_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_firefox_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_firefox_vpn_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_firefoxreality_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_focus_beta_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_focus_nightly_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_focus_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_fennec_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_firefox_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_firefoxbeta_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_firefoxvpn_network_extension_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_firefoxvpn_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_focus_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_klar_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_lockbox_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_tiktok_reporter_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod.org_mozilla_ios_tiktok_reporter_tiktok_reportershare_stable.INFORMATION_SCHEMA.PARTITIONS - - moz-fx-data-shared-prod. ```

⚠️ Only part of the diff is displayed.

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Fix format error"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/main-generated-sql/dags/: bqetl_merino_newtab_extract_to_gcs.py diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py --- /tmp/workspace/main-generated-sql/dags/bqetl_analytics_tables.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_analytics_tables.py 2024-08-15 12:12:39.000000000 +0000 @@ -324,6 +324,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_conv_evnt_categorization__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_conv_evnt_categorization", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_google_analytics_derived_ga4__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_google_analytics_derived_ga4", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", @@ -331,6 +338,13 @@ ) ExternalTaskMarker( + task_id="bqetl_search__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_search", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=82800)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_main_summary__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_main_summary", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", @@ -344,6 +358,13 @@ ) ExternalTaskMarker( + task_id="bqetl_desktop_engagement_model__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_desktop_engagement_model", + external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", + ) + + ExternalTaskMarker( task_id="bqetl_desktop_retention_model__wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_desktop_retention_model", external_task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", @@ -460,32 +481,6 @@ parameters=["submission_date:DATE:{{ds}}"], ) - with TaskGroup( - "clients_first_seen_v3_external", - ) as clients_first_seen_v3_external: - ExternalTaskMarker( - task_id="bqetl_desktop_conv_evnt_categorization__wait_for_clients_first_seen_v3", - external_dag_id="bqetl_desktop_conv_evnt_categorization", - external_task_id="wait_for_clients_first_seen_v3", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", - ) - - ExternalTaskMarker( - task_id="bqetl_search__wait_for_clients_first_seen_v3", - external_dag_id="bqetl_search", - external_task_id="wait_for_clients_first_seen_v3", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=82800)).isoformat() }}", - ) - - ExternalTaskMarker( - task_id="bqetl_desktop_engagement_model__wait_for_clients_first_seen_v3", - external_dag_id="bqetl_desktop_engagement_model", - external_task_id="wait_for_clients_first_seen_v3", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=50400)).isoformat() }}", - ) - - clients_first_seen_v3_external.set_upstream(clients_first_seen_v3) - fenix_derived__funnel_retention_clients_week_2__v1 = bigquery_etl_query( task_id="fenix_derived__funnel_retention_clients_week_2__v1", destination_table="funnel_retention_clients_week_2_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_conv_evnt_categorization.py 2024-08-15 12:12:41.000000000 +0000 @@ -50,10 +50,11 @@ tags=tags, ) as dag: - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( - task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", - external_dag_id="bqetl_main_summary", - external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -61,11 +62,12 @@ failed_states=FAILED_STATES, pool="DATA_ENG_EXTERNALTASKSENSOR", ) + ) - wait_for_clients_first_seen_v3 = ExternalTaskSensor( - task_id="wait_for_clients_first_seen_v3", - external_dag_id="bqetl_analytics_tables", - external_task_id="clients_first_seen_v3", + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -131,11 +133,11 @@ ) google_ads_derived__conversion_event_categorization__v1.set_upstream( - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 ) google_ads_derived__conversion_event_categorization__v1.set_upstream( - wait_for_clients_first_seen_v3 + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) google_ads_derived__conversion_event_categorization__v1.set_upstream( diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_desktop_engagement_model.py /tmp/workspace/generated-sql/dags/bqetl_desktop_engagement_model.py --- /tmp/workspace/main-generated-sql/dags/bqetl_desktop_engagement_model.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_desktop_engagement_model.py 2024-08-15 12:12:41.000000000 +0000 @@ -50,10 +50,11 @@ tags=tags, ) as dag: - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( - task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", - external_dag_id="bqetl_main_summary", - external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", + external_dag_id="bqetl_analytics_tables", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -61,11 +62,12 @@ failed_states=FAILED_STATES, pool="DATA_ENG_EXTERNALTASKSENSOR", ) + ) - wait_for_clients_first_seen_v3 = ExternalTaskSensor( - task_id="wait_for_clients_first_seen_v3", - external_dag_id="bqetl_analytics_tables", - external_task_id="clients_first_seen_v3", + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 = ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_last_seen__v2", + external_dag_id="bqetl_main_summary", + external_task_id="checks__fail_telemetry_derived__clients_last_seen__v2", execution_delta=datetime.timedelta(seconds=36000), check_existence=True, mode="reschedule", @@ -101,9 +103,9 @@ ) telemetry_derived__desktop_engagement_clients__v1.set_upstream( - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 ) telemetry_derived__desktop_engagement_clients__v1.set_upstream( - wait_for_clients_first_seen_v3 + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py --- /tmp/workspace/main-generated-sql/dags/bqetl_generated_funnels.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_generated_funnels.py 2024-08-15 12:12:40.000000000 +0000 @@ -288,6 +288,21 @@ ) ) + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1 = bigquery_etl_query( + task_id="monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1", + destination_table="monitor_dashboard_user_journey_funnels_v1", + dataset_id="monitor_frontend_derived", + project_id="moz-fx-data-shared-prod", + owner="ksiegler@mozilla.org", + email=[ + "ascholtz@mozilla.com", + "ksiegler@mozilla.org", + "telemetry-alerts@mozilla.com", + ], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + accounts_frontend_derived__email_first_reg_login_funnels_by_service__v1.set_upstream( wait_for_copy_deduplicate_all ) @@ -349,3 +364,7 @@ firefox_accounts_derived__registration_funnels_legacy_events__v1.set_upstream( wait_for_firefox_accounts_derived__fxa_stdout_events__v1 ) + + monitor_frontend_derived__monitor_dashboard_user_journey_funnels__v1.set_upstream( + wait_for_copy_deduplicate_all + ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py --- /tmp/workspace/main-generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_google_analytics_derived_ga4.py 2024-08-15 12:12:40.000000000 +0000 @@ -67,6 +67,19 @@ retry_delay=datetime.timedelta(seconds=1800), ) + wait_for_wmo_events_table = BigQueryTableExistenceSensor( + task_id="wait_for_wmo_events_table", + project_id="moz-fx-data-marketing-prod", + dataset_id="analytics_313696158", + table_id="events_{{ ds_nodash }}", + gcp_conn_id="google_cloud_shared_prod", + deferrable=True, + poke_interval=datetime.timedelta(seconds=1800), + timeout=datetime.timedelta(seconds=36000), + retries=1, + retry_delay=datetime.timedelta(seconds=1800), + ) + wait_for_checks__fail_stub_attribution_service_derived__dl_token_ga_attribution_lookup__v1 = ExternalTaskSensor( task_id="wait_for_checks__fail_stub_attribution_service_derived__dl_token_ga_attribution_lookup__v1", external_dag_id="bqetl_mozilla_org_derived", @@ -118,19 +131,6 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_wmo_events_table = BigQueryTableExistenceSensor( - task_id="wait_for_wmo_events_table", - project_id="moz-fx-data-marketing-prod", - dataset_id="analytics_313696158", - table_id="events_{{ ds_nodash }}", - gcp_conn_id="google_cloud_shared_prod", - deferrable=True, - poke_interval=datetime.timedelta(seconds=1800), - timeout=datetime.timedelta(seconds=36000), - retries=1, - retry_delay=datetime.timedelta(seconds=1800), - ) - checks__fail_mozilla_org_derived__ga_clients__v2 = bigquery_dq_check( task_id="checks__fail_mozilla_org_derived__ga_clients__v2", source_table="ga_clients_v2", @@ -206,19 +206,6 @@ retries=0, ) - checks__warn_mozilla_org_derived__blogs_goals__v2 = bigquery_dq_check( - task_id="checks__warn_mozilla_org_derived__blogs_goals__v2", - source_table="blogs_goals_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - is_dq_check_fail=False, - owner="kwindau@mozilla.com", - email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], - depends_on_past=False, - parameters=["submission_date:DATE:{{ds}}"], - retries=0, - ) - checks__warn_mozilla_org_derived__ga_sessions__v2 = bigquery_dq_check( task_id="checks__warn_mozilla_org_derived__ga_sessions__v2", source_table="ga_sessions_v2", @@ -233,23 +220,6 @@ retries=0, ) - checks__warn_mozilla_org_derived__www_site_hits__v2 = bigquery_dq_check( - task_id="checks__warn_mozilla_org_derived__www_site_hits__v2", - source_table="www_site_hits_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - is_dq_check_fail=False, - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - depends_on_past=False, - parameters=["submission_date:DATE:{{ds}}"], - retries=0, - ) - ga_derived__blogs_daily_summary__v2 = bigquery_etl_query( task_id="ga_derived__blogs_daily_summary__v2", destination_table="blogs_daily_summary_v2", @@ -371,77 +341,6 @@ depends_on_past=False, ) - mozilla_org_derived__blogs_daily_summary__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__blogs_daily_summary__v2", - destination_table="blogs_daily_summary_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__blogs_goals__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__blogs_goals__v2", - destination_table="blogs_goals_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="kwindau@mozilla.com", - email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__blogs_landing_page_summary__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__blogs_landing_page_summary__v2", - destination_table="blogs_landing_page_summary_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__blogs_sessions__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__blogs_sessions__v2", - destination_table="blogs_sessions_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__firefox_whatsnew_summary__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__firefox_whatsnew_summary__v2", - destination_table="firefox_whatsnew_summary_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - mozilla_org_derived__ga_clients__v2 = bigquery_etl_query( task_id="mozilla_org_derived__ga_clients__v2", destination_table="ga_clients_v2", @@ -498,66 +397,10 @@ depends_on_past=False, ) - mozilla_org_derived__www_site_events_metrics__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__www_site_events_metrics__v2", - destination_table="www_site_events_metrics_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="kwindau@mozilla.com", - email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__www_site_hits__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__www_site_hits__v2", - destination_table="www_site_hits_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__www_site_landing_page_metrics__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__www_site_landing_page_metrics__v2", - destination_table="www_site_landing_page_metrics_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__www_site_metrics_summary__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__www_site_metrics_summary__v2", - destination_table="www_site_metrics_summary_v2", - dataset_id="mozilla_org_derived", - project_id="moz-fx-data-shared-prod", - owner="mhirose@mozilla.com", - email=[ - "kwindau@mozilla.com", - "mhirose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - - mozilla_org_derived__www_site_page_metrics__v2 = bigquery_etl_query( - task_id="mozilla_org_derived__www_site_page_metrics__v2", - destination_table="www_site_page_metrics_v2", - dataset_id="mozilla_org_derived", + mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( + task_id="mozilla_vpn_derived__site_metrics_summary__v2", + destination_table="site_metrics_summary_v2", + dataset_id="mozilla_vpn_derived", project_id="moz-fx-data-shared-prod", owner="kwindau@mozilla.com", email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], @@ -579,18 +422,10 @@ ga_derived__www_site_hits__v2 ) - checks__warn_mozilla_org_derived__blogs_goals__v2.set_upstream( - mozilla_org_derived__blogs_goals__v2 - ) - checks__warn_mozilla_org_derived__ga_sessions__v2.set_upstream( mozilla_org_derived__ga_sessions__v2 ) - checks__warn_mozilla_org_derived__www_site_hits__v2.set_upstream( - mozilla_org_derived__www_site_hits__v2 - ) - ga_derived__blogs_daily_summary__v2.set_upstream(ga_derived__blogs_goals__v2) ga_derived__blogs_daily_summary__v2.set_upstream(ga_derived__blogs_sessions__v2) @@ -623,34 +458,6 @@ ga_derived__www_site_page_metrics__v2.set_upstream(ga_derived__www_site_hits__v2) - mozilla_org_derived__blogs_daily_summary__v2.set_upstream( - mozilla_org_derived__blogs_goals__v2 - ) - - mozilla_org_derived__blogs_daily_summary__v2.set_upstream( - mozilla_org_derived__blogs_sessions__v2 - ) - - mozilla_org_derived__blogs_goals__v2.set_upstream(wait_for_blogs_events_table) - - mozilla_org_derived__blogs_landing_page_summary__v2.set_upstream( - mozilla_org_derived__blogs_goals__v2 - ) - - mozilla_org_derived__blogs_landing_page_summary__v2.set_upstream( - mozilla_org_derived__blogs_sessions__v2 - ) - - mozilla_org_derived__blogs_landing_page_summary__v2.set_upstream( - wait_for_blogs_events_table - ) - - mozilla_org_derived__blogs_sessions__v2.set_upstream(wait_for_blogs_events_table) - - mozilla_org_derived__firefox_whatsnew_summary__v2.set_upstream( - mozilla_org_derived__www_site_hits__v2 - ) - mozilla_org_derived__ga_clients__v2.set_upstream( mozilla_org_derived__ga_sessions__v2 ) @@ -679,20 +486,6 @@ mozilla_org_derived__www_site_downloads__v2.set_upstream(wait_for_wmo_events_table) - mozilla_org_derived__www_site_events_metrics__v2.set_upstream( - mozilla_org_derived__www_site_hits__v2 - ) - - mozilla_org_derived__www_site_hits__v2.set_upstream(wait_for_wmo_events_table) - - mozilla_org_derived__www_site_landing_page_metrics__v2.set_upstream( - mozilla_org_derived__www_site_hits__v2 - ) - - mozilla_org_derived__www_site_metrics_summary__v2.set_upstream( + mozilla_vpn_derived__site_metrics_summary__v2.set_upstream( wait_for_wmo_events_table ) - - mozilla_org_derived__www_site_page_metrics__v2.set_upstream( - mozilla_org_derived__www_site_hits__v2 - ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_internal_tooling.py /tmp/workspace/generated-sql/dags/bqetl_internal_tooling.py --- /tmp/workspace/main-generated-sql/dags/bqetl_internal_tooling.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_internal_tooling.py 2024-08-15 12:12:38.000000000 +0000 @@ -64,17 +64,6 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - fxci_derived__task_run_costs__v1 = bigquery_etl_query( - task_id="fxci_derived__task_run_costs__v1", - destination_table="task_run_costs_v1", - dataset_id="fxci_derived", - project_id="moz-fx-data-shared-prod", - owner="ahalberstadt@mozilla.com", - email=["ahalberstadt@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - fxci_worker_cost__v1 = bigquery_etl_query( task_id="fxci_worker_cost__v1", destination_table="worker_costs_v1", @@ -101,6 +90,4 @@ depends_on_past=False, ) - fxci_derived__task_run_costs__v1.set_upstream(fxci_worker_cost__v1) - mozregression_aggregates__v1.set_upstream(wait_for_copy_deduplicate_all) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_merino_newtab_extract_to_gcs.py /tmp/workspace/generated-sql/dags/bqetl_merino_newtab_extract_to_gcs.py --- /tmp/workspace/main-generated-sql/dags/bqetl_merino_newtab_extract_to_gcs.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_merino_newtab_extract_to_gcs.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,81 +0,0 @@ -# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py - -from airflow import DAG -from airflow.sensors.external_task import ExternalTaskMarker -from airflow.sensors.external_task import ExternalTaskSensor -from airflow.utils.task_group import TaskGroup -import datetime -from operators.gcp_container_operator import GKEPodOperator -from utils.constants import ALLOWED_STATES, FAILED_STATES -from utils.gcp import bigquery_etl_query, bigquery_dq_check - -docs = """ -### bqetl_merino_newtab_extract_to_gcs - -Built from bigquery-etl repo, [`dags/bqetl_merino_newtab_extract_to_gcs.py`](https://github.com/mozilla/bigquery-etl/blob/generated-sql/dags/bqetl_merino_newtab_extract_to_gcs.py) - -#### Description - -Aggregates Newtab engagement data that lands in a GCS bucket for Merino recommendations. - -#### Owner - -cbeck@mozilla.com - -#### Tags - -* impact/tier_1 -* repo/bigquery-etl -""" - - -default_args = { - "owner": "cbeck@mozilla.com", - "start_date": datetime.datetime(2024, 8, 14, 0, 0), - "end_date": None, - "email": ["cbeck@mozilla.com", "gkatre@mozilla.com"], - "depends_on_past": False, - "retry_delay": datetime.timedelta(seconds=300), - "email_on_failure": True, - "email_on_retry": False, - "retries": 2, -} - -tags = ["impact/tier_1", "repo/bigquery-etl"] - -with DAG( - "bqetl_merino_newtab_extract_to_gcs", - default_args=default_args, - schedule_interval="*/20 * * * *", - doc_md=docs, - tags=tags, -) as dag: - - checks__fail_telemetry_derived__newtab_merino_extract__v1 = bigquery_dq_check( - task_id="checks__fail_telemetry_derived__newtab_merino_extract__v1", - source_table="newtab_merino_extract_v1", - dataset_id="telemetry_derived", - project_id="moz-fx-data-shared-prod", - is_dq_check_fail=True, - owner="cbeck@mozilla.com", - email=["cbeck@mozilla.com", "gkatre@mozilla.com"], - depends_on_past=False, - task_concurrency=1, - retries=0, - ) - - telemetry_derived__newtab_merino_extract__v1 = bigquery_etl_query( - task_id="telemetry_derived__newtab_merino_extract__v1", - destination_table="newtab_merino_extract_v1", - dataset_id="telemetry_derived", - project_id="moz-fx-data-shared-prod", - owner="cbeck@mozilla.com", - email=["cbeck@mozilla.com", "gkatre@mozilla.com"], - date_partition_parameter=None, - depends_on_past=False, - task_concurrency=1, - ) - - checks__fail_telemetry_derived__newtab_merino_extract__v1.set_upstream( - telemetry_derived__newtab_merino_extract__v1 - ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_kpi_metrics.py /tmp/workspace/generated-sql/dags/bqetl_mobile_kpi_metrics.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_kpi_metrics.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_kpi_metrics.py 2024-08-15 12:12:41.000000000 +0000 @@ -718,18 +718,6 @@ task_group=task_group_fenix, ) - fenix_derived__new_profiles__v1 = bigquery_etl_query( - task_id="fenix_derived__new_profiles__v1", - destination_table="new_profiles_v1", - dataset_id="fenix_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/kpi_table_reviewers", - email=["kik@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - task_group=task_group_fenix, - ) - fenix_derived__retention__v1 = bigquery_etl_query( task_id="fenix_derived__retention__v1", destination_table='retention_v1${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}', @@ -768,18 +756,6 @@ task_group=task_group_firefox_ios, ) - firefox_ios_derived__new_profiles__v1 = bigquery_etl_query( - task_id="firefox_ios_derived__new_profiles__v1", - destination_table="new_profiles_v1", - dataset_id="firefox_ios_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/kpi_table_reviewers", - email=["kik@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - task_group=task_group_firefox_ios, - ) - firefox_ios_derived__retention__v1 = bigquery_etl_query( task_id="firefox_ios_derived__retention__v1", destination_table='retention_v1${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}', @@ -818,18 +794,6 @@ task_group=task_group_focus_android, ) - focus_android_derived__new_profiles__v1 = bigquery_etl_query( - task_id="focus_android_derived__new_profiles__v1", - destination_table="new_profiles_v1", - dataset_id="focus_android_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/kpi_table_reviewers", - email=["kik@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - task_group=task_group_focus_android, - ) - focus_android_derived__retention__v1 = bigquery_etl_query( task_id="focus_android_derived__retention__v1", destination_table='retention_v1${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}', @@ -868,18 +832,6 @@ task_group=task_group_focus_ios, ) - focus_ios_derived__new_profiles__v1 = bigquery_etl_query( - task_id="focus_ios_derived__new_profiles__v1", - destination_table="new_profiles_v1", - dataset_id="focus_ios_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/kpi_table_reviewers", - email=["kik@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - task_group=task_group_focus_ios, - ) - focus_ios_derived__retention__v1 = bigquery_etl_query( task_id="focus_ios_derived__retention__v1", destination_table='retention_v1${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}', @@ -918,18 +870,6 @@ task_group=task_group_klar_android, ) - klar_android_derived__new_profiles__v1 = bigquery_etl_query( - task_id="klar_android_derived__new_profiles__v1", - destination_table="new_profiles_v1", - dataset_id="klar_android_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/kpi_table_reviewers", - email=["kik@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - task_group=task_group_klar_android, - ) - klar_android_derived__retention__v1 = bigquery_etl_query( task_id="klar_android_derived__retention__v1", destination_table='retention_v1${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}', @@ -968,18 +908,6 @@ task_group=task_group_klar_ios, ) - klar_ios_derived__new_profiles__v1 = bigquery_etl_query( - task_id="klar_ios_derived__new_profiles__v1", - destination_table="new_profiles_v1", - dataset_id="klar_ios_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/kpi_table_reviewers", - email=["kik@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - task_group=task_group_klar_ios, - ) - klar_ios_derived__retention__v1 = bigquery_etl_query( task_id="klar_ios_derived__retention__v1", destination_table='retention_v1${{ macros.ds_format(macros.ds_add(ds, -27), "%Y-%m-%d", "%Y%m%d") }}', @@ -1110,28 +1038,6 @@ wait_for_checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1 ) - fenix_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1 - ) - - fenix_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1 - ) - - fenix_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1 - ) - - fenix_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1 - ) - - fenix_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1 - ) - - fenix_derived__new_profiles__v1.set_upstream(fenix_derived__attribution_clients__v1) - fenix_derived__retention__v1.set_upstream( wait_for_checks__fail_fenix_derived__firefox_android_clients__v1 ) @@ -1208,22 +1114,6 @@ wait_for_checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1 ) - firefox_ios_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1 - ) - - firefox_ios_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1 - ) - - firefox_ios_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1 - ) - - firefox_ios_derived__new_profiles__v1.set_upstream( - firefox_ios_derived__attribution_clients__v1 - ) - firefox_ios_derived__retention__v1.set_upstream( wait_for_checks__fail_firefox_ios_derived__firefox_ios_clients__v1 ) @@ -1276,22 +1166,6 @@ wait_for_checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1 ) - focus_android_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1 - ) - - focus_android_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_derived__baseline_clients_last_seen__v1 - ) - - focus_android_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1 - ) - - focus_android_derived__new_profiles__v1.set_upstream( - focus_android_derived__attribution_clients__v1 - ) - focus_android_derived__retention__v1.set_upstream( wait_for_checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1 ) @@ -1324,14 +1198,6 @@ wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 ) - focus_ios_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 - ) - - focus_ios_derived__new_profiles__v1.set_upstream( - focus_ios_derived__attribution_clients__v1 - ) - focus_ios_derived__retention__v1.set_upstream( wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 ) @@ -1348,14 +1214,6 @@ wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1 ) - klar_android_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1 - ) - - klar_android_derived__new_profiles__v1.set_upstream( - klar_android_derived__attribution_clients__v1 - ) - klar_android_derived__retention__v1.set_upstream( wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1 ) @@ -1372,14 +1230,6 @@ wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 ) - klar_ios_derived__new_profiles__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 - ) - - klar_ios_derived__new_profiles__v1.set_upstream( - klar_ios_derived__attribution_clients__v1 - ) - klar_ios_derived__retention__v1.set_upstream( wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-15 12:12:37.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_monitoring.py /tmp/workspace/generated-sql/dags/bqetl_monitoring.py --- /tmp/workspace/main-generated-sql/dags/bqetl_monitoring.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_monitoring.py 2024-08-15 12:12:38.000000000 +0000 @@ -295,17 +295,6 @@ email=["amiyaguchi@mozilla.com", "ascholtz@mozilla.com"], ) - monitoring_derived__table_partition_expirations__v1 = bigquery_etl_query( - task_id="monitoring_derived__table_partition_expirations__v1", - destination_table="table_partition_expirations_v1", - dataset_id="monitoring_derived", - project_id="moz-fx-data-shared-prod", - owner="bewu@mozilla.cam", - email=["ascholtz@mozilla.com", "bewu@mozilla.cam"], - date_partition_parameter="submission_date", - depends_on_past=True, - ) - monitoring_derived__telemetry_missing_columns__v3 = bigquery_etl_query( task_id="monitoring_derived__telemetry_missing_columns__v3", destination_table="telemetry_missing_columns_v3", @@ -361,10 +350,6 @@ wait_for_copy_deduplicate_all ) - monitoring_derived__table_partition_expirations__v1.set_upstream( - wait_for_copy_deduplicate_all - ) - monitoring_derived__telemetry_missing_columns__v3.set_upstream( wait_for_copy_deduplicate_all ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mozilla_vpn_site_metrics.py /tmp/workspace/generated-sql/dags/bqetl_mozilla_vpn_site_metrics.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mozilla_vpn_site_metrics.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mozilla_vpn_site_metrics.py 2024-08-15 12:12:38.000000000 +0000 @@ -68,21 +68,9 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_wait_for_wmo_events_table = ExternalTaskSensor( - task_id="wait_for_wait_for_wmo_events_table", - external_dag_id="bqetl_google_analytics_derived_ga4", - external_task_id="wait_for_wmo_events_table", - execution_delta=datetime.timedelta(seconds=10800), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - mozilla_vpn_derived__funnel_ga_to_subscriptions__v2 = bigquery_etl_query( - task_id="mozilla_vpn_derived__funnel_ga_to_subscriptions__v2", - destination_table="funnel_ga_to_subscriptions_v2", + mozilla_vpn_derived__funnel_ga_to_subscriptions__v1 = bigquery_etl_query( + task_id="mozilla_vpn_derived__funnel_ga_to_subscriptions__v1", + destination_table="funnel_ga_to_subscriptions_v1", dataset_id="mozilla_vpn_derived", project_id="moz-fx-data-shared-prod", owner="srose@mozilla.com", @@ -91,29 +79,41 @@ depends_on_past=False, ) - mozilla_vpn_derived__site_metrics_summary__v2 = bigquery_etl_query( - task_id="mozilla_vpn_derived__site_metrics_summary__v2", - destination_table="site_metrics_summary_v2", + mozilla_vpn_derived__site_metrics_empty_check__v1 = bigquery_etl_query( + task_id="mozilla_vpn_derived__site_metrics_empty_check__v1", + destination_table=None, dataset_id="mozilla_vpn_derived", project_id="moz-fx-data-shared-prod", - owner="kwindau@mozilla.com", - email=[ - "kwindau@mozilla.com", - "srose@mozilla.com", - "telemetry-alerts@mozilla.com", - ], + owner="srose@mozilla.com", + email=["srose@mozilla.com", "telemetry-alerts@mozilla.com"], date_partition_parameter="submission_date", depends_on_past=False, + parameters=["date:DATE:{{ds}}"], + sql_file_path="sql/moz-fx-data-shared-prod/mozilla_vpn_derived/site_metrics_empty_check_v1/query.sql", + retry_delay=datetime.timedelta(seconds=1800), + retries=18, + email_on_retry=False, + ) + + mozilla_vpn_derived__site_metrics_summary__v1 = bigquery_etl_query( + task_id="mozilla_vpn_derived__site_metrics_summary__v1", + destination_table="site_metrics_summary_v1", + dataset_id="mozilla_vpn_derived", + project_id="moz-fx-data-shared-prod", + owner="srose@mozilla.com", + email=["srose@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter="date", + depends_on_past=False, ) - mozilla_vpn_derived__funnel_ga_to_subscriptions__v2.set_upstream( + mozilla_vpn_derived__funnel_ga_to_subscriptions__v1.set_upstream( wait_for_mozilla_vpn_derived__all_subscriptions__v1 ) - mozilla_vpn_derived__funnel_ga_to_subscriptions__v2.set_upstream( - mozilla_vpn_derived__site_metrics_summary__v2 + mozilla_vpn_derived__funnel_ga_to_subscriptions__v1.set_upstream( + mozilla_vpn_derived__site_metrics_summary__v1 ) - mozilla_vpn_derived__site_metrics_summary__v2.set_upstream( - wait_for_wait_for_wmo_events_table + mozilla_vpn_derived__site_metrics_summary__v1.set_upstream( + mozilla_vpn_derived__site_metrics_empty_check__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-15 12:12:38.000000000 +0000 @@ -251,23 +251,11 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_copy_deduplicate_all = ExternalTaskSensor( - task_id="wait_for_copy_deduplicate_all", - external_dag_id="copy_deduplicate", - external_task_id="copy_deduplicate_all", - execution_delta=datetime.timedelta(seconds=12600), - check_existence=True, - mode="reschedule", - allowed_states=ALLOWED_STATES, - failed_states=FAILED_STATES, - pool="DATA_ENG_EXTERNALTASKSENSOR", - ) - - wait_for_search_derived__search_clients_daily__v8 = ExternalTaskSensor( - task_id="wait_for_search_derived__search_clients_daily__v8", - external_dag_id="bqetl_search", - external_task_id="search_derived__search_clients_daily__v8", - execution_delta=datetime.timedelta(seconds=5400), + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), check_existence=True, mode="reschedule", allowed_states=ALLOWED_STATES, @@ -323,17 +311,6 @@ depends_on_past=False, ) - search_derived__search_dau_aggregates__v1 = bigquery_etl_query( - task_id="search_derived__search_dau_aggregates__v1", - destination_table="search_dau_aggregates_v1", - dataset_id="search_derived", - project_id="moz-fx-data-shared-prod", - owner="mozilla/revenue_forecasting_data_reviewers", - email=["akomar@mozilla.com", "telemetry-alerts@mozilla.com"], - date_partition_parameter="submission_date", - depends_on_past=False, - ) - search_derived__search_revenue_levers_daily__v1 = bigquery_etl_query( task_id="search_derived__search_revenue_levers_daily__v1", destination_table="search_revenue_levers_daily_v1", @@ -357,74 +334,6 @@ wait_for_search_derived__mobile_search_clients_daily__v1 ) - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fenix_nightly_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_fennec_aurora_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_firefox_beta_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_firefox_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_beta_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_focus_nightly_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_fennec_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_firefox_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_firefoxbeta_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_focus_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_ios_klar_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_org_mozilla_klar_derived__baseline_clients_last_seen__v1 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_copy_deduplicate_all - ) - - search_derived__search_dau_aggregates__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 - ) - search_derived__search_revenue_levers_daily__v1.set_upstream( wait_for_checks__fail_org_mozilla_fenix_derived__baseline_clients_last_seen__v1 ) @@ -482,13 +391,13 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search.py /tmp/workspace/generated-sql/dags/bqetl_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search.py 2024-08-15 11:53:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search.py 2024-08-15 12:12:38.000000000 +0000 @@ -76,10 +76,11 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) - wait_for_clients_first_seen_v3 = ExternalTaskSensor( - task_id="wait_for_clients_first_seen_v3", + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 = ( + ExternalTaskSensor( + task_id="wait_for_checks__fail_telemetry_derived__clients_first_seen__v2", external_dag_id="bqetl_analytics_tables", - external_task_id="clients_first_seen_v3", + external_task_id="checks__fail_telemetry_derived__clients_first_seen__v2", execution_delta=datetime.timedelta(seconds=3600), check_existence=True, mode="reschedule", @@ -87,6 +88,7 @@ failed_states=FAILED_STATES, pool="DATA_ENG_EXTERNALTASKSENSOR", ) + ) search_derived__search_aggregates__v8 = bigquery_etl_query( task_id="search_derived__search_aggregates__v8", @@ -165,13 +167,6 @@ ) ExternalTaskMarker( - task_id="bqetl_search_dashboard__wait_for_search_derived__search_clients_daily__v8", - external_dag_id="bqetl_search_dashboard", - external_task_id="wait_for_search_derived__search_clients_daily__v8", - execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=81000)).isoformat() }}", - ) - - ExternalTaskMarker( task_id="bqetl_addons__wait_for_search_derived__search_clients_daily__v8", external_dag_id="bqetl_addons", external_task_id="wait_for_search_derived__search_clients_daily__v8", @@ -298,7 +293,7 @@ ) search_derived__search_clients_last_seen__v2.set_upstream( - wait_for_clients_first_seen_v3 + wait_for_checks__fail_telemetry_derived__clients_first_seen__v2 ) search_derived__search_clients_last_seen__v2.set_upstream( Only in /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod: google_search_console Only in /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod: google_search_console_derived Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/ads: ppa_measurements_limited Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix: new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix: new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix_derived: new_profiles_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios: new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios: new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_ios_derived: new_profiles_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_android: new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_android: new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_android_derived: new_profiles_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_ios: new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_ios: new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/focus_ios_derived: new_profiles_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci: task_run_costs Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived: task_run_costs_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console: limited_historical_search_impressions_by_page Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/google_search_console: limited_historical_search_impressions_by_site Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android: new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android: new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_android_derived: new_profiles_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_ios: new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_ios: new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/klar_ios_derived: new_profiles_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring_derived: table_partition_expirations_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: blogs_daily_summary_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: blogs_goals_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: blogs_landing_page_summary_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: blogs_sessions_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: firefox_whatsnew_summary_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: www_site_events_metrics_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: www_site_hits_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: www_site_landing_page_metrics_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: www_site_metrics_summary_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_org_derived: www_site_page_metrics_v2 Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/mozilla_vpn: site_metrics_summary_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/mozilla_vpn_derived: funnel_ga_to_subscriptions_v2 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search: search_dau_aggregates Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived: search_dau_aggregates_v1 Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry: mobile_new_profile_clients Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry: mobile_new_profiles Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived: newtab_merino_extract_v1 diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/mozfun/google_search_console/classify_site_query/udf.sql /tmp/workspace/generated-sql/sql/mozfun/google_search_console/classify_site_query/udf.sql --- /tmp/workspace/main-generated-sql/sql/mozfun/google_search_console/classify_site_query/udf.sql 2024-08-15 11:48:26.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/mozfun/google_search_console/classify_site_query/udf.sql 2024-08-15 11:49:13.000000000 +0000 @@ -15,16 +15,14 @@ ARRAY_TO_STRING( [ r'.i.e.ox', - r'\besr\b', r'\bf..ef[ioa]+', r'\bf[aie]+re?\s?[fbv]', r'\bf[ier]+\s?[fv][oei]?[xkc]', - r'\bff\b', r'\bfi[aeiobcfkrvx]+x', r'\bfirf', r'f.r.f.x', r'faiya-fokkusu', -- fire fox (Japanese) - r'fox', + r'foxfire', r'huohu', -- fire fox (Chinese) r'nightly', r'quantum', @@ -39,12 +37,13 @@ r'כןרקכםס', -- yes you have been (Hebrew) r'פיירפוקס', -- firefox (Hebrew) r'فاجا بوكس', -- `faja buks` (Arabic) + r'فاير فوكس', -- fire fox (Arabic) + r'فايرفوكس', -- firefox (Arabic) r'فایر فاکس', -- fire fox (Arabic) r'فایرفاکس', -- firefox (Arabic) r'فرفاكس', -- `firfaks` (Arabic) r'فري فاكس', -- `fri faks` (Arabic) r'فكس', -- `fiks` (Arabic) - r'فوكس', -- fox (Arabic) r'فياير', -- `fayayar` (Arabic) r'فير', -- `fir` (Arabic) r'फायर फॉक्स', -- fire fox (Indic) @@ -66,9 +65,9 @@ r'ไฟลฟอก', -- `fil fxk` (Thai) r'ไฟล์ฟอก', -- `fil fxk` (Thai) r'หมาไฟ', -- fire dog (Thai) + r'파워폭스', -- powerfox (Korean) r'파이어', -- fire (Korean) r'파폭', -- `papog` (Korean) - r'폭스', -- fox (Korean) r'ふぁいあ', -- `faia` (Japanese) r'ファイア', -- fire (Japanese) r'ふあいあーふぉっくす', -- `faia fokkusu` (Japanese) @@ -107,13 +106,10 @@ query, ARRAY_TO_STRING( [ - r'\bmpl\b', r'm o z i l l a', r'm.zil', r'm\w*zilla', r'mizolla', - r'mo[dnr]?zil+a', - r'mo[jsx]il+a', r'moz://a', r'moz:lla', r'moz+\w*l', diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml 2024-08-15 11:49:01.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/metadata.yaml 2024-08-15 12:06:14.000000000 +0000 @@ -2,6 +2,6 @@ # Generated by bigquery_etl.dependency references: view.sql: - - moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout + - moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout_20* - moz-fx-fxa-nonprod.gke_fxa_stage_log.stderr - moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql 2024-08-15 11:48:26.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/flows_live/view.sql 2024-08-15 11:49:13.000000000 +0000 @@ -28,12 +28,12 @@ AND jsonPayload.fields.event_type IS NOT NULL UNION ALL SELECT - DATE(`timestamp`) AS submission_date, + PARSE_DATE('%y%m%d', _TABLE_SUFFIX) AS submission_date, JSON_VALUE(jsonPayload.fields.user_properties, '$.flow_id') AS flow_id, `timestamp`, TO_HEX(SHA256(jsonPayload.fields.user_id)) AS fxa_uid, FROM - `moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout` + `moz-fx-fxa-nonprod-375e.fxa_stage_logs.stdout_20*` WHERE jsonPayload.type = 'amplitudeEvent' AND jsonPayload.fields.event_type IS NOT NULL diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml 2024-08-15 11:50:04.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/blogs_sessions_v2/metadata.yaml 2024-08-15 12:07:16.000000000 +0000 @@ -9,9 +9,13 @@ dag: bqetl_google_analytics_derived_ga4 scheduling: dag_name: bqetl_google_analytics_derived_ga4 - depends_on: + depends_on_tables_existing: - task_id: wait_for_blogs_events_table - dag_name: bqetl_google_analytics_derived_ga4 + table_id: moz-fx-data-marketing-prod.analytics_314399816.events_{{ ds_nodash }} + poke_interval: 30m + timeout: 10h + retries: 1 + retry_delay: 30m bigquery: time_partitioning: type: day diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/www_site_metrics_summary_v2/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/www_site_metrics_summary_v2/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/www_site_metrics_summary_v2/metadata.yaml 2024-08-15 11:50:04.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/ga_derived/www_site_metrics_summary_v2/metadata.yaml 2024-08-15 12:07:16.000000000 +0000 @@ -9,9 +9,13 @@ dag: bqetl_google_analytics_derived_ga4 scheduling: dag_name: bqetl_google_analytics_derived_ga4 - depends_on: + depends_on_tables_existing: - task_id: wait_for_wmo_events_table - dag_name: bqetl_google_analytics_derived_ga4 + table_id: moz-fx-data-marketing-prod.analytics_313696158.events_{{ ds_nodash }} + poke_interval: 30m + timeout: 10h + retries: 1 + retry_delay: 30m bigquery: time_partitioning: type: day diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/metadata.yaml 2024-08-15 12:06:15.000000000 +0000 @@ -0,0 +1,13 @@ +friendly_name: Search Impressions By Page (DEPRECATED) +description: |- + This view is being moved to `mozdata.google_search_console.search_impressions_by_page`. + Please update any queries referencing this view to point to the new location. +owners: +- srose@mozilla.com +labels: + owner1: srose + +# Generated by bigquery_etl.dependency +references: + view.sql: + - moz-fx-data-shared-prod.google_search_console.search_impressions_by_page diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-marketing-prod/google_search_console/search_impressions_by_page/schema.yaml 2024-08-15 11:49:13.000000000 +0000 @@ -0,0 +1,143 @@ +fields: +- name: date + type: DATE + mode: NULLABLE + description: The day on which the search occurred (Pacific Time). +- name: site_url + type: STRING + mode: NULLABLE + description: |- + For domain properties, this will be `sc-domain:` followed by the domain name. + For URL-prefix properties, it will be the full URL of the property definition. +- name: site_domain_name + type: STRING + mode: NULLABLE + description: Domain name of the site. +- name: page_url + type: STRING + mode: NULLABLE + description: |- + The final page URL linked by a search result after any skip redirects. + This will be null for anonymized Discover impressions. +- name: p ```

⚠️ Only part of the diff is displayed.

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-21 12:07:43.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-21 12:08:41.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-21 12:07:43.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-21 12:08:42.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-21 12:02:34.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-21 12:02:42.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY - submission_date, - country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 + 1, + 2, + 3 UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY - submission_date, - country + 1, + 2, + 3 ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY - submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, - country + 1, + 2, + 3, + 4 ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together -SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, + cd.submission_date, + cd.partner, + cd.device, NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-21 15:00:56.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-21 15:02:31.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-21 15:00:56.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-21 15:02:32.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-21 14:56:03.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-21 14:56:03.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY - submission_date, - country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date + 1, + 2, + 3 UNION ALL SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( - SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY - submission_date, - country + 1, + 2, + 3 ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY - submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, - country + 1, + 2, + 3, + 4 ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together -SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.submission_date, + cd.partner, + cd.device, + CAST(NULL AS STRING) AS channel, + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-23 16:51:22.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-23 16:52:25.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-23 16:51:22.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-23 16:52:26.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-23 16:46:04.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-23 16:46:06.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY - submission_date, - country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date + 1, + 2, + 3 UNION ALL SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( - SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY - submission_date, - country + 1, + 2, + 3 ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY - submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, - country + 1, + 2, + 3, + 4 ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together -SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.submission_date, + cd.partner, + cd.device, + CAST(NULL AS STRING) AS channel, + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-23 19:18:26.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-23 19:20:09.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-23 19:18:26.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-23 19:20:09.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-23 19:14:04.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-23 19:14:06.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY + device, submission_date, country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY + device, submission_date, country ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, + partner, + device, country ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.submission_date, + cd.partner, + cd.device, + CAST(NULL AS STRING) AS channel, + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-26 16:13:36.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-26 16:14:24.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-26 16:13:36.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-26 16:14:24.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-26 16:08:24.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-26 16:08:31.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY + device, submission_date, country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY + device, submission_date, country ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, + partner, + device, country ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.submission_date, + cd.partner, + cd.device, + CAST(NULL AS STRING) AS channel, + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-27 17:28:13.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-27 17:29:27.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-27 17:28:13.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-27 17:29:28.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-27 17:23:16.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-27 17:23:20.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY + device, submission_date, country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY + device, submission_date, country ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, + partner, + device, country ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.submission_date, + cd.partner, + cd.device, + CAST(NULL AS STRING) AS channel, + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff

dataops-ci-bot commented 1 month ago

Integration report for "Merge branch 'main' into search_levers_daily_from_aggregates"

sql.diff

Click to expand! ```diff diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py --- /tmp/workspace/main-generated-sql/dags/bqetl_mobile_search.py 2024-08-27 18:07:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_mobile_search.py 2024-08-27 18:08:42.000000000 +0000 @@ -80,6 +80,20 @@ depends_on_past=False, ) + with TaskGroup( + "search_derived__mobile_search_aggregates__v1_external", + ) as search_derived__mobile_search_aggregates__v1_external: + ExternalTaskMarker( + task_id="bqetl_search_dashboard__wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_search_dashboard", + external_task_id="wait_for_search_derived__mobile_search_aggregates__v1", + execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=77400)).isoformat() }}", + ) + + search_derived__mobile_search_aggregates__v1_external.set_upstream( + search_derived__mobile_search_aggregates__v1 + ) + search_derived__mobile_search_clients_daily__v1 = bigquery_etl_query( task_id="search_derived__mobile_search_clients_daily__v1", destination_table="mobile_search_clients_daily_v1", diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py --- /tmp/workspace/main-generated-sql/dags/bqetl_search_dashboard.py 2024-08-27 18:07:05.000000000 +0000 +++ /tmp/workspace/generated-sql/dags/bqetl_search_dashboard.py 2024-08-27 18:08:43.000000000 +0000 @@ -275,6 +275,18 @@ pool="DATA_ENG_EXTERNALTASKSENSOR", ) + wait_for_search_derived__mobile_search_aggregates__v1 = ExternalTaskSensor( + task_id="wait_for_search_derived__mobile_search_aggregates__v1", + external_dag_id="bqetl_mobile_search", + external_task_id="search_derived__mobile_search_aggregates__v1", + execution_delta=datetime.timedelta(seconds=9000), + check_existence=True, + mode="reschedule", + allowed_states=ALLOWED_STATES, + failed_states=FAILED_STATES, + pool="DATA_ENG_EXTERNALTASKSENSOR", + ) + search_derived__desktop_search_aggregates_by_userstate__v1 = bigquery_etl_query( task_id="search_derived__desktop_search_aggregates_by_userstate__v1", destination_table="desktop_search_aggregates_by_userstate_v1", @@ -482,13 +494,17 @@ ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_copy_deduplicate_all + wait_for_checks__fail_telemetry_derived__clients_last_seen__v2 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__mobile_search_clients_daily__v1 + wait_for_search_derived__mobile_search_aggregates__v1 ) search_derived__search_revenue_levers_daily__v1.set_upstream( - wait_for_search_derived__search_clients_daily__v8 + wait_for_search_derived__search_aggregates__v8 + ) + + search_derived__search_revenue_levers_daily__v1.set_upstream( + search_derived__search_dau_aggregates__v1 ) diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-27 18:02:23.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/search_derived/search_revenue_levers_daily_v1/query.sql 2024-08-27 18:02:29.000000000 +0000 @@ -1,276 +1,75 @@ -WITH --- Google Desktop (search + DAU) -desktop_data_google AS ( +WITH combined_search_data AS ( SELECT submission_date, - IF(LOWER(channel) LIKE '%esr%', 'ESR', 'personal') AS channel, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%google%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Google' - AND default_search_engine LIKE '%google%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Google', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Google' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'desktop' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + search_with_ads_organic, + IF(is_sap_monetizable, sap, 0) AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.search_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - GROUP BY - submission_date, - channel, - country - ORDER BY - submission_date, - channel, - country -), --- Bing Desktop (non-Acer) -desktop_data_bing AS ( + UNION ALL SELECT submission_date, country, - COUNT(DISTINCT client_id) AS dau, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS dau_w_engine_as_default, - COUNT( - DISTINCT IF( - sap > 0 - AND normalized_engine = 'Bing' - AND default_search_engine LIKE '%bing%', - client_id, - NULL - ) - ) AS dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS ad_click_organic, - SUM(IF(normalized_engine = 'Bing', search_with_ads_organic, 0)) AS search_with_ads_organic, - SUM(IF(normalized_engine = 'Bing' AND is_sap_monetizable, sap, 0)) AS monetizable_sap + normalized_engine AS partner, + 'mobile' AS device, + sap, + tagged_sap, + tagged_follow_on, + search_with_ads, + ad_click, + organic, + ad_click_organic, + 0 AS search_with_ads_organic, + 0 AS monetizable_sap FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `moz-fx-data-shared-prod.search.mobile_search_aggregates` WHERE submission_date = @submission_date - AND (distribution_id IS NULL OR distribution_id NOT LIKE '%acer%') - AND client_id NOT IN (SELECT client_id FROM `moz-fx-data-shared-prod.search.acer_cohort`) - GROUP BY - submission_date, - country - ORDER BY - submission_date, - country ), --- DDG Desktop + Extension -desktop_data_ddg AS ( - SELECT +eligible_markets_dau AS ( + SELECT DISTINCT + "desktop" AS device, submission_date, country, - COUNT(DISTINCT client_id) AS dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') - ), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - (engine) IN ('ddg', 'duckduckgo') - AND sap > 0 - AND ( - (default_search_engine LIKE('%ddg%') OR default_search_engine LIKE('%duckduckgo%')) - AND NOT default_search_engine LIKE('%addon%') + (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) + OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) ), client_id, NULL ) - ) AS ddg_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), sap, 0)) AS ddg_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(engine IN ('ddg', 'duckduckgo'), tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(engine IN ('ddg', 'duckduckgo'), search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click, 0)) AS ddg_ad_click, - SUM(IF(engine IN ('ddg', 'duckduckgo'), organic, 0)) AS ddg_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo'), ad_click_organic, 0)) AS ddg_ad_click_organic, - SUM( - IF(engine IN ('ddg', 'duckduckgo'), search_with_ads_organic, 0) - ) AS ddg_search_with_ads_organic, - SUM(IF(engine IN ('ddg', 'duckduckgo') AND is_sap_monetizable, sap, 0)) AS ddg_monetizable_sap, - -- in-content probes not available for addon so these metrics although being here will be zero - COUNT( - DISTINCT IF(default_search_engine LIKE('ddg%addon'), client_id, NULL) - ) AS ddgaddon_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - engine = 'ddg-addon' - AND sap > 0 - AND default_search_engine LIKE('ddg%addon'), - client_id, - NULL - ) - ) AS ddgaddon_dau_engaged_w_sap, - SUM(IF(engine IN ('ddg-addon'), sap, 0)) AS ddgaddon_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_sap, 0)) AS ddgaddon_tagged_sap, - SUM(IF(engine IN ('ddg-addon'), tagged_follow_on, 0)) AS ddgaddon_tagged_follow_on, - SUM(IF(engine IN ('ddg-addon'), search_with_ads, 0)) AS ddgaddon_search_with_ads, - SUM(IF(engine IN ('ddg-addon'), ad_click, 0)) AS ddgaddon_ad_click, - SUM(IF(engine IN ('ddg-addon'), organic, 0)) AS ddgaddon_organic, - SUM(IF(engine IN ('ddg-addon'), ad_click_organic, 0)) AS ddgaddon_ad_click_organic, - SUM( - IF(engine IN ('ddg-addon'), search_with_ads_organic, 0) - ) AS ddgaddon_search_with_ads_organic, - SUM(IF(engine IN ('ddg-addon') AND is_sap_monetizable, sap, 0)) AS ddgaddon_monetizable_sap + ) AS google_eligible_dau FROM - `moz-fx-data-shared-prod.search.search_clients_engines_sources_daily` + `mozdata.telemetry.desktop_active_users` WHERE submission_date = @submission_date + AND is_dau + # not including Mozilla Online + AND app_name = "Firefox Desktop" GROUP BY + device, submission_date, country - ORDER BY - submission_date, - country -), --- Mobile DAU data -- merging baseline clients to AUA clients -## baseline ping -- mobile default search engine by client id -mobile_baseline_engine AS ( - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - FROM - `moz-fx-data-shared-prod.fenix.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_android.baseline` - WHERE - DATE(submission_timestamp) = @submission_date - UNION ALL - SELECT DISTINCT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine - FROM - `moz-fx-data-shared-prod.focus_ios.baseline` - WHERE - DATE(submission_timestamp) = @submission_date -), -## baseline ping search counts -- mobile search counts by client id -mobile_baseline_search AS ( - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine_code AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.fenix.baseline`, - UNNEST(metrics.labeled_counter.metrics_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.firefox_ios.baseline`, - UNNEST(metrics.labeled_counter.search_counts) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.browser_default_search_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_android.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 - UNION ALL - SELECT - DATE(submission_timestamp) AS submission_date, - client_info.client_id, - metrics.string.search_default_engine AS default_search_engine, - `moz-fx-data-shared-prod.udf.normalize_search_engine`(key_value.key) AS normalized_engine, - key_value.value AS search_count - FROM - `moz-fx-data-shared-prod.focus_ios.baseline`, - UNNEST(metrics.labeled_counter.browser_search_search_count) AS key_value - WHERE - DATE(submission_timestamp) = @submission_date - AND key_value.value <= 10000 -), -## baseline-powered clients who qualify for KPI (activity filters applied) -mobile_dau_data AS ( SELECT DISTINCT - submission_date, "mobile" AS device, - country, - client_id - FROM - `mozdata.telemetry.mobile_active_users` - WHERE - submission_date = @submission_date - AND is_dau - # not including Fenix MozillaOnline, BrowserStack, Klar - AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") -), -final_mobile_dau_counts AS ( - SELECT submission_date, country, - COUNT(DISTINCT client_id) AS eligible_dau, + COUNT(DISTINCT client_id) AS global_eligible_dau, COUNT( DISTINCT IF( ( @@ -280,316 +79,79 @@ client_id, NULL ) - ) AS google_eligible_dau, - COUNT( - DISTINCT IF( - default_search_engine LIKE '%google%' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_w_engine_as_default, - COUNT( - DISTINCT IF( - search_count > 0 - AND normalized_engine = 'Google' - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ), - client_id, - NULL - ) - ) AS google_dau_engaged_w_sap, - COUNT( - DISTINCT IF(default_search_engine LIKE '%bing%', client_id, NULL) - ) AS bing_dau_w_engine_as_default, - COUNT( - DISTINCT IF(search_count > 0 AND normalized_engine = 'Bing', client_id, NULL) - ) AS bing_dau_engaged_w_sap, - COUNT( - DISTINCT IF( - default_search_engine LIKE('%ddg%') - OR default_search_engine LIKE('%duckduckgo%'), - client_id, - NULL - ) - ) AS ddg_dau_w_engine_as_default, - COUNT( - DISTINCT IF(normalized_engine = "DuckDuckGo" AND search_count > 0, client_id, NULL) - ) AS ddg_dau_engaged_w_sap + ) AS google_eligible_dau FROM - mobile_dau_data - LEFT JOIN - mobile_baseline_engine - USING (submission_date, client_id) - LEFT JOIN - mobile_baseline_search - USING (submission_date, client_id, default_search_engine) + `mozdata.telemetry.mobile_active_users` + WHERE + submission_date = @submission_date + AND is_dau + # not including Fenix MozillaOnline, BrowserStack, Klar + AND app_name IN ("Focus iOS", "Firefox iOS", "Fenix", "Focus Android") GROUP BY + device, submission_date, country ), --- Google Mobile (search only - as mobile search metrics is based on metrics --- ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_google AS ( +desktop_mobile_search_dau AS ( SELECT submission_date, + partner, + device, country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Google', sap, 0)) AS sap, - SUM(IF(normalized_engine = 'Google', tagged_sap, 0)) AS tagged_sap, - SUM(IF(normalized_engine = 'Google', tagged_follow_on, 0)) AS tagged_follow_on, - SUM(IF(normalized_engine = 'Google', search_with_ads, 0)) AS search_with_ads, - SUM(IF(normalized_engine = 'Google', ad_click, 0)) AS ad_click, - SUM(IF(normalized_engine = 'Google', organic, 0)) AS organic, - SUM(IF(normalized_engine = 'Google', ad_click_organic, 0)) AS ad_click_organic, - -- metrics do not exist for mobile - 0 AS search_with_ads_organic, - 0 AS monetizable_sap + SUM(dau_w_engine_as_default) AS dau_w_engine_as_default, + SUM(dau_engaged_w_sap) AS dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) + `mozdata.search.search_dau_aggregates` WHERE submission_date = @submission_date - AND ( - (submission_date < "2023-12-01" AND country NOT IN ('RU', 'UA', 'TR', 'BY', 'KZ', 'CN')) - OR (submission_date >= "2023-12-01" AND country NOT IN ('RU', 'UA', 'BY', 'CN')) - ) - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) GROUP BY submission_date, - country, - google_eligible_dau, - google_dau_w_engine_as_default, - google_dau_engaged_w_sap - ORDER BY - submission_date, + partner, + device, country ), --- Bing & DDG Mobile (search only - as mobile search metrics is based on --- metrics ping, while DAU should be based on main ping on Mobile, see also --- https://mozilla-hub.atlassian.net/browse/RS-575) -mobile_data_bing_ddg AS ( +combined_search_dau AS ( SELECT submission_date, + partner, + device, country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap, - SUM(IF(normalized_engine = 'Bing', sap, 0)) AS bing_sap, - SUM(IF(normalized_engine = 'Bing', tagged_sap, 0)) AS bing_tagged_sap, - SUM(IF(normalized_engine = 'Bing', tagged_follow_on, 0)) AS bing_tagged_follow_on, - SUM(IF(normalized_engine = 'Bing', search_with_ads, 0)) AS bing_search_with_ads, - SUM(IF(normalized_engine = 'Bing', ad_click, 0)) AS bing_ad_click, - SUM(IF(normalized_engine = 'Bing', organic, 0)) AS bing_organic, - SUM(IF(normalized_engine = 'Bing', ad_click_organic, 0)) AS bing_ad_click_organic, - -- metrics do not exist for mobile - 0 AS bing_search_with_ads_organic, - 0 AS bing_monetizable_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', sap, 0)) AS ddg_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_sap, 0)) AS ddg_tagged_sap, - SUM(IF(normalized_engine = 'DuckDuckGo', tagged_follow_on, 0)) AS ddg_tagged_follow_on, - SUM(IF(normalized_engine = 'DuckDuckGo', search_with_ads, 0)) AS ddg_search_with_ads, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click, 0)) AS ddg_ad_click, - SUM(IF(normalized_engine = 'DuckDuckGo', organic, 0)) AS ddg_organic, - SUM(IF(normalized_engine = 'DuckDuckGo', ad_click_organic, 0)) AS ddg_ad_click_organic, - -- metrics do not exist for mobile - 0 AS ddg_search_with_ads_organic, - 0 AS ddg_monetizable_sap + CASE + WHEN partner = "Google" + THEN google_eligible_dau + ELSE global_eligible_dau + END AS dau_eligible_markets, + dau_w_engine_as_default, + dau_engaged_w_sap FROM - `moz-fx-data-shared-prod.search.mobile_search_clients_engines_sources_daily` - INNER JOIN - final_mobile_dau_counts - USING (submission_date, country) - WHERE - submission_date = @submission_date - AND ( - app_name IN ('Fenix', 'Firefox Preview', 'Focus', 'Focus Android Glean', 'Focus iOS Glean') - OR (app_name = 'Fennec' AND os = 'iOS') - ) - GROUP BY - submission_date, - country, - eligible_dau, - bing_dau_w_engine_as_default, - bing_dau_engaged_w_sap, - ddg_dau_w_engine_as_default, - ddg_dau_engaged_w_sap - ORDER BY - submission_date, - country + desktop_mobile_search_dau + LEFT JOIN + eligible_markets_dau + USING (submission_date, device, country) ) --- combine all desktop and mobile together SELECT - submission_date, - 'Google' AS partner, - 'desktop' AS device, - channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - dau_w_engine_as_default -FROM - desktop_data_bing -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'desktop' AS device, - NULL AS channel, - country, - dau, - ddg_dau_engaged_w_sap AS dau_engaged_w_sap, - ddg_sap AS sap, - ddg_tagged_sap AS tagged_sap, - ddg_tagged_follow_on AS tagged_follow_on, - ddg_search_with_ads AS search_with_ads, - ddg_ad_click AS ad_click, - ddg_organic AS organic, - ddg_ad_click_organic AS ad_click_organic, - ddg_search_with_ads_organic AS search_with_ads_organic, - ddg_monetizable_sap AS monetizable_sap, - ddg_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'extension' AS device, - NULL AS channel, - country, - dau, - ddgaddon_dau_engaged_w_sap AS dau_engaged_w_sap, - ddgaddon_sap AS sap, - ddgaddon_tagged_sap AS tagged_sap, - ddgaddon_tagged_follow_on AS tagged_follow_on, - ddgaddon_search_with_ads AS search_with_ads, - ddgaddon_ad_click AS ad_click, - ddgaddon_organic AS organic, - ddgaddon_ad_click_organic AS ad_click_organic, - ddgaddon_search_with_ads_organic AS search_with_ads_organic, - ddgaddon_monetizable_sap AS monetizable_sap, - ddgaddon_dau_w_engine_as_default AS dau_w_engine_as_default -FROM - desktop_data_ddg -UNION ALL -SELECT - submission_date, - 'Google' AS partner, - 'mobile' AS device, - 'n/a' AS channel, - country, - google_eligible_dau AS dau, - google_dau_engaged_w_sap AS dau_engaged_w_sap, - sap, - tagged_sap, - tagged_follow_on, - search_with_ads, - ad_click, - organic, - ad_click_organic, - search_with_ads_organic, - monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF( - submission_date >= "2024-06-01", - google_dau_w_engine_as_default, - NULL - ) AS dau_w_engine_as_default -FROM - mobile_data_google -UNION ALL -SELECT - submission_date, - 'Bing' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - bing_dau_engaged_w_sap, - bing_sap, - bing_tagged_sap, - bing_tagged_follow_on, - bing_search_with_ads, - bing_ad_click, - bing_organic, - bing_ad_click_organic, - bing_search_with_ads_organic, - bing_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", bing_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg -UNION ALL -SELECT - submission_date, - 'DuckDuckGo' AS partner, - 'mobile' AS device, - NULL AS channel, - country, - eligible_dau, - ddg_dau_engaged_w_sap, - ddg_sap, - ddg_tagged_sap, - ddg_tagged_follow_on, - ddg_search_with_ads, - ddg_ad_click, - ddg_organic, - ddg_ad_click_organic, - ddg_search_with_ads_organic, - ddg_monetizable_sap, - # custom engine bug merged in v121 - # null engine bug merged in v126 - # remove default engine data prior to June 2024 - IF(submission_date >= "2024-06-01", ddg_dau_w_engine_as_default, NULL) AS dau_w_engine_as_default -FROM - mobile_data_bing_ddg + cd.submission_date, + cd.partner, + cd.device, + CAST(NULL AS STRING) AS channel, + cd.country, + du.dau_eligible_markets AS dau, + du.dau_w_engine_as_default, + du.dau_engaged_w_sap, + cd.sap, + cd.tagged_sap, + cd.tagged_follow_on, + cd.search_with_ads, + cd.ad_click, + cd.organic, + cd.ad_click_organic, + cd.search_with_ads_organic, + cd.monetizable_sap +FROM + combined_search_data cd +LEFT JOIN + combined_search_dau du + ON cd.partner = du.partner + AND cd.submission_date = du.submission_date + AND cd.country = du.country + AND cd.device = du.device; ```

Link to full diff